diff --git a/README.md b/README.md index e3e31e74..38466932 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,9 @@ Also, pyGenomeTracks can be installed using pip $ pip install pyGenomeTracks ``` +Since version 3.5, pyGenomeTracks uses BEDTools, don't forget to install it or load it into your environment + + If the latest version wants to be installed use: ```bash diff --git a/docs/content/all_default_properties_rst.txt b/docs/content/all_default_properties_rst.txt index 7b8d962a..81e0a467 100644 --- a/docs/content/all_default_properties_rst.txt +++ b/docs/content/all_default_properties_rst.txt @@ -1,62 +1,62 @@ -========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= -parameter :doc:`tracks/x_axis` :doc:`tracks/epilogos` :doc:`tracks/links` :doc:`tracks/domains` :doc:`tracks/bed` :doc:`tracks/gtf` :doc:`tracks/narrow_peak` :doc:`tracks/bigwig` :doc:`tracks/bedgraph` :doc:`tracks/bedgraph_mat :doc:`tracks/hlines` :doc:`tracks/hic_matrix` :doc:`tracks/scalebar` -========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= ========================= -overlay_previous no no no no no no no no no no no no no -where bottom left -fontsize 15 12 12 12 -categories_file not set -orientation not set not set not set not set not set not set not set not set not set not set not set -links_type arcs -line_width not set 0.5 0.5 0.5 1 0.5 0.5 -line_style solid solid -color blue #1f78b4 #1f78b4 #1f78b4 #FF000080 #33a02c #a6cee3 black black -alpha 0.8 1 1 1 1 -max_value not set not set not set not set not set not set not set not set not set -min_value not set not set not set not set not set not set not set not set -ylim not set -compact_arcs_level 0 -use_middle false false -border_color black black black -prefered_name transcript_name transcript_name transcript_name -merge_transcripts false false false -labels true true -style flybase flybase -display stacked stacked -max_labels 60 60 -global_max_row false false -gene_rows not set not set -arrow_interval 2 2 -arrowhead_included false false -color_utr grey grey -height_utr 1 1 -arrow_length not set not set -all_labels_inside false false -labels_in_margin false false -show_data_range true true true true true -show_labels true -use_summit true -width_adjust 1.5 -type peak fill fill matrix -negative_color not set not set -nans_to_zeros false false -summary_method mean not set -number_of_bins 700 700 -transform no no no -log_pseudocount 0 0 -y_axis_values transformed transformed -second_file* not set not set -operation* file file -grid false false -rasterize false true true -pos_score_in_bin center -plot_horizontal_lines false -colormap viridis RdYlBu_r -depth 100000 -show_masked_bins false -scale_factor 1 -x_center not set -size not setparameter :doc:`tracks/x_axis` :doc:`tracks/epilogos` :doc:`tracks/links` :doc:`tracks/domains` :doc:`tracks/bed` :doc:`tracks/gtf` :doc:`tracks/narrow_peak` :doc:`tracks/bigwig` :doc:`tracks/bedgraph` :doc:`tracks/bedgraph_matrix` :doc:`tracks/hlines` :doc:`tracks/hic_matrix` :doc:`tracks/scalebar` +============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= +overlay_previous no no no no no no no no no no no no no +where bottom left +fontsize 15 12 12 12 +categories_file not set +orientation not set not set not set not set not set not set not set not set not set not set not set +links_type arcs +line_width not set 0.5 0.5 0.5 1 0.5 0.5 +line_style solid solid +color blue #1f78b4 #1f78b4 #1f78b4 #FF000080 #33a02c #a6cee3 black black +alpha 0.8 1 1 1 1 +max_value not set not set not set not set not set not set not set not set not set +min_value not set not set not set not set not set not set not set not set +ylim not set +compact_arcs_level 0 +use_middle false false +border_color black black black +prefered_name transcript_name transcript_name transcript_name +merge_transcripts false false false +labels true true +style flybase flybase +display stacked stacked +max_labels 60 60 +global_max_row false false +gene_rows not set not set +arrow_interval 2 2 +arrowhead_included false false +color_utr grey grey +height_utr 1 1 +arrow_length not set not set +all_labels_inside false false +labels_in_margin false false +show_data_range true true true true true +show_labels true +use_summit true +width_adjust 1.5 +type peak fill fill matrix +negative_color not set not set +nans_to_zeros false false +summary_method mean not set +number_of_bins 700 700 +transform no no no +log_pseudocount 0 0 +y_axis_values transformed transformed +second_file* not set not set +operation* file file +grid false false +rasterize false true true +pos_score_in_bin center +plot_horizontal_lines false +colormap viridis RdYlBu_r +depth 100000 +show_masked_bins false +scale_factor 1 +x_center not set +size not set +============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= ============================= \* While pyGenomeTracks can convert coverage tracks on the fly, this might be a time-consuming step, especially on large files and if you want to replot many times. In this situation, we recommend using the deepTools suite to convert your files in advance. For example `bamCoverage `_ or `bamCompare `_ \ No newline at end of file diff --git a/docs/content/faq.rst b/docs/content/faq.rst index 5795679a..c7cdbf3e 100644 --- a/docs/content/faq.rst +++ b/docs/content/faq.rst @@ -4,8 +4,8 @@ FAQ .. contents:: :local: -Why the scale of my Hi-C plot suddenly changed ----------------------------------------------- +Why the scale of my Hi-C plot suddenly changed? +----------------------------------------------- pyGenomeTracks is using `HiCMatrix `_ to read the matrix from ``h5`` and ``cool`` format. From version 12 to version 13, a normalization step when reading ``cool`` file was removed. This normalization was mostly used when you were providing ``cool`` file from `cooler balance `_. @@ -13,3 +13,18 @@ when you were providing ``cool`` file from `cooler balance `_ to read the matrix from ``cool`` format. +Unfortunately, a bug was introduced in version 14 ignoring the correction factors. +This bug was fixed in version 15 so update HiCMatrix to last version should fix it. diff --git a/docs/content/installation.rst b/docs/content/installation.rst index a6f5ad7e..2a052a3c 100644 --- a/docs/content/installation.rst +++ b/docs/content/installation.rst @@ -10,14 +10,22 @@ Remember -- pyGenomeTracks is available for **command line usage** as well as fo Requirements ------------- -* Python >=3.6 +Python dependencies: + +* Python >= 3.6 * numpy >= 1.16 -* intervaltree >=2.1.0 -* pyBigWig >= 0.3.4 -* hicmatrix >= 0.14 -* pysam >= 0.8 -* matplotlib >= 3.1.1 -* gffutils >=0.9 +* intervaltree >= 2.1.0 +* pyBigWig >= 0.3.16 +* hicmatrix >= 15 +* pysam >= 0.14 +* matplotlib == 3.1.1 +* gffutils >= 0.9 +* pybedtools >= 0.8.1 +* tqdm >= 4.20 + +External dependencies: + +* BEDTools Command line installation using ``conda`` ----------------------------------------- @@ -44,6 +52,8 @@ Install pyGenomeTracks using the following command: All python requirements should be automatically installed. +Since version 3.5, pyGenomeTracks require BEDTools, do not forget to install it or load it into your environment. + If you need to specify a specific path for the installation of the tools, make use of `pip install`'s numerous options: .. code:: bash @@ -55,7 +65,7 @@ Command line installation without ``pip`` You are highly recommended to use `conda install` rather than the following complicated steps. -1. Install the requirements listed above in the "requirements" section. This is done automatically by `pip`. +1. Install the requirements listed above in the "requirements" section. This is done automatically by `pip` (except BEDTools). 2. Download source code :: diff --git a/docs/content/releases.rst b/docs/content/releases.rst index 3f0ae1a7..f7bc8877 100644 --- a/docs/content/releases.rst +++ b/docs/content/releases.rst @@ -4,6 +4,7 @@ Releases .. toctree:: :maxdepth: 1 + releases/3.5.1 releases/3.5 releases/3.4 releases/3.3 diff --git a/docs/content/releases/3.5.1.rst b/docs/content/releases/3.5.1.rst new file mode 100644 index 00000000..c02e3e69 --- /dev/null +++ b/docs/content/releases/3.5.1.rst @@ -0,0 +1,13 @@ +3.5.1 +===== + +Bugfixes: +^^^^^^^^^ + +- Get a message when bedtools is installed instead of crashing without any message. + +- Always test if a bedgraph is tabix indexed without checking the extension + +- Fix a bug which was happening when ``operation`` or ``summary_method`` was used on bedgraph whereas the bedgraph had some missing intervals. + +- Enforcing version 15 of HiCMatrix. Version 14 had a bug concerning the application of the correction factors of cool files. diff --git a/environment.yml b/environment.yml index da9796ed..5cd6e629 100644 --- a/environment.yml +++ b/environment.yml @@ -11,7 +11,7 @@ dependencies: - intervaltree >=2.1.0 - pybigwig >=0.3.16 - future >=0.17.0 - - hicmatrix >=13 + - hicmatrix >=15 - pysam >=0.14 - pytest - gffutils >=0.9 diff --git a/pygenometracks/_version.py b/pygenometracks/_version.py index 1027b84a..2c9603d5 100644 --- a/pygenometracks/_version.py +++ b/pygenometracks/_version.py @@ -2,4 +2,4 @@ # This file is originally generated from Git information by running 'setup.py # version'. Distribution tarballs contain a pre-generated copy of this file. -__version__ = '3.5' +__version__ = '3.5.1' diff --git a/pygenometracks/getAllDefaultsAndPossible.py b/pygenometracks/getAllDefaultsAndPossible.py index 43ace338..e560c61e 100644 --- a/pygenometracks/getAllDefaultsAndPossible.py +++ b/pygenometracks/getAllDefaultsAndPossible.py @@ -85,7 +85,7 @@ def main(): # For the default they are summarized in a matrix mat = np.empty((len(all_default_parameters) + 2, len(all_tracks_with_default) + 1), - dtype='U25') + dtype='U100') mat[0, 0] = 'parameter' mat[1, 0] = '--' for j, track_type in enumerate(all_tracks_with_default, start=1): diff --git a/pygenometracks/tests/generateAllOutput.sh b/pygenometracks/tests/generateAllOutput.sh index 92350ec5..deeb1fac 100644 --- a/pygenometracks/tests/generateAllOutput.sh +++ b/pygenometracks/tests/generateAllOutput.sh @@ -34,6 +34,7 @@ bin/pgt --tracks ./pygenometracks/tests/test_data/bedgraph_useMid.ini --region c bin/pgt --tracks ./pygenometracks/tests/test_data/operation_bdg.ini --region X:2700000-3100000 --trackLabelFraction 0.2 --dpi 130 -o ./pygenometracks/tests/test_data/master_operation_bdg.png bin/pgt --tracks ./pygenometracks/tests/test_data/bedgraph_withNA.ini --region X:2700000-3100000 --trackLabelFraction 0.2 --dpi 130 -o ./pygenometracks/tests/test_data/master_bedgraph_withNA.png bin/pgt --tracks ./pygenometracks/tests/test_data/bedgraph_negative.ini --region X:2700000-3100000 --trackLabelFraction 0.2 --dpi 130 -o ./pygenometracks/tests/test_data/master_negative.png +bin/pgt --tracks ./pygenometracks/tests/test_data/bedgraph_end_not_covered.ini --region chr7:100-400 --trackLabelFraction 0.2 --dpi 130 -o ./pygenometracks/tests/test_data/master_bedgraph_end_not_covered.png # test bigWigTrack: bin/pgt --tracks ./pygenometracks/tests/test_data/bigwig.ini --region X:2700000-3100000 --trackLabelFraction 0.2 --dpi 130 -o ./pygenometracks/tests/test_data/master_bigwig.png diff --git a/pygenometracks/tests/test_bedGraphTrack.py b/pygenometracks/tests/test_bedGraphTrack.py index 29c986c7..c889522b 100644 --- a/pygenometracks/tests/test_bedGraphTrack.py +++ b/pygenometracks/tests/test_bedGraphTrack.py @@ -214,6 +214,18 @@ with open(os.path.join(ROOT, "log1pm_bedgraph.ini"), 'w') as fh: fh.write(log1p_with_neg) + +bedgraph_end_not_covered = """ +[bedgraph] +file = simple.bdg +height = 3 +summary_method = max + +[x-axis] +""" +with open(os.path.join(ROOT, "bedgraph_end_not_covered.ini"), 'w') as fh: + fh.write(bedgraph_end_not_covered) + tolerance = 13 # default matplotlib pixed difference tolerance @@ -268,6 +280,14 @@ def test_plot_bedgraph_tracks_rasterize(): ini_file = os.path.join(ROOT, "bedgraph_useMid.ini") region = "chr2:73,800,000-75,744,000" expected_file = os.path.join(ROOT, 'master_bedgraph_useMid.pdf') + # matplotlib compare on pdf will create a png next to it. + # To avoid issues related to write in test_data folder + # We copy the expected file into a temporary place + new_expected_file = NamedTemporaryFile(suffix='.pdf', + prefix='pyGenomeTracks_test_', + delete=False) + os.system(f'cp {expected_file} {new_expected_file.name}') + expected_file = new_expected_file.name args = f"--tracks {ini_file} --region {region} "\ "--trackLabelFraction 0.2 --width 38 --dpi 130 "\ f"--outFileName {outfile.name}".split() @@ -419,3 +439,19 @@ def test_bedgraph_neg_log1p(): os.remove(ini_file) os.remove(os.path.join(ROOT, "bedgraph_chrx_2e6_5e6_m.bg")) + + +def test_bedgraph_end_not_covered(): + region = "chr7:100-400" + outfile = NamedTemporaryFile(suffix='.png', prefix='bedgraph_end_not_covered_', delete=False) + args = "--tracks {ini} --region {region} --trackLabelFraction 0.2 " \ + "--dpi 130 --outFileName {outfile}" \ + "".format(ini=os.path.join(ROOT, "bedgraph_end_not_covered.ini"), + outfile=outfile.name, region=region).split() + pygenometracks.plotTracks.main(args) + print("saving test to {}".format(outfile.name)) + res = compare_images(os.path.join(ROOT, 'master_bedgraph_end_not_covered.png'), + outfile.name, tolerance) + assert res is None, res + + os.remove(outfile.name) diff --git a/pygenometracks/tests/test_data/bedgraph_end_not_covered.ini b/pygenometracks/tests/test_data/bedgraph_end_not_covered.ini new file mode 100644 index 00000000..fbaa6f10 --- /dev/null +++ b/pygenometracks/tests/test_data/bedgraph_end_not_covered.ini @@ -0,0 +1,7 @@ + +[bedgraph] +file = simple.bdg +height = 3 +summary_method = max + +[x-axis] diff --git a/pygenometracks/tests/test_data/master_bedgraph_end_not_covered.png b/pygenometracks/tests/test_data/master_bedgraph_end_not_covered.png new file mode 100644 index 00000000..81facbd7 Binary files /dev/null and b/pygenometracks/tests/test_data/master_bedgraph_end_not_covered.png differ diff --git a/pygenometracks/tests/test_data/simple.bdg b/pygenometracks/tests/test_data/simple.bdg new file mode 100644 index 00000000..bdb29041 --- /dev/null +++ b/pygenometracks/tests/test_data/simple.bdg @@ -0,0 +1,6 @@ +track type=bedGraph name="400-404notIncluded" +chr7 100 200 1 +chr7 200 300 2 +chr7 300 350 3 +chr7 350 399 4 +chr7 405 450 5 diff --git a/pygenometracks/tests/test_hiCMatrixTracks.py b/pygenometracks/tests/test_hiCMatrixTracks.py index 1ab0c7e3..d5fc43bd 100644 --- a/pygenometracks/tests/test_hiCMatrixTracks.py +++ b/pygenometracks/tests/test_hiCMatrixTracks.py @@ -468,13 +468,19 @@ def test_plot_tracks_with_hic_rasterize_height_2chr(): output_file = outfile.name[:-4] + '_' + region_str + extension expected_file = os.path.join(ROOT, 'master_plot_hic_rasterize_height_' + region_str + extension) + # matplotlib compare on pdf will create a png next to it. + # To avoid issues related to write in test_data folder + # We copy the expected file into a temporary place + new_expected_file = NamedTemporaryFile(suffix='.pdf', + prefix='pyGenomeTracks_test_', + delete=False) + os.system(f'cp {expected_file} {new_expected_file.name}') + expected_file = new_expected_file.name res = compare_images(expected_file, output_file, tolerance) assert res is None, res os.remove(output_file) - if extension == '.pdf': - os.remove(expected_file.replace(extension, '_pdf.png')) def test_plot_tracks_with_hic_rasterize_height_2chr_individual(): @@ -485,15 +491,20 @@ def test_plot_tracks_with_hic_rasterize_height_2chr_individual(): delete=False) expected_file = os.path.join(ROOT, 'master_plot_hic_rasterize_height_' + region.replace(':', '-') + extension) - + # matplotlib compare on pdf will create a png next to it. + # To avoid issues related to write in test_data folder + # We copy the expected file into a temporary place + new_expected_file = NamedTemporaryFile(suffix='.pdf', + prefix='pyGenomeTracks_test_', + delete=False) + os.system(f'cp {expected_file} {new_expected_file.name}') + expected_file = new_expected_file.name args = f"--tracks {ini_file} --region {region} "\ "--trackLabelFraction 0.23 --width 38 --dpi 10 "\ f"--outFileName {outfile.name}".split() pygenometracks.plotTracks.main(args) res = compare_images(expected_file, outfile.name, tolerance) - if extension == '.pdf': - os.remove(expected_file.replace(extension, '_pdf.png')) assert res is None, res diff --git a/pygenometracks/tracks/BedGraphTrack.py b/pygenometracks/tracks/BedGraphTrack.py index e426cebd..46b5e496 100644 --- a/pygenometracks/tracks/BedGraphTrack.py +++ b/pygenometracks/tracks/BedGraphTrack.py @@ -140,15 +140,13 @@ def __init__(self, properties_dict): " requires to set the parameter" " second_file.") else: - if self.properties['second_file'].endswith(".bgz"): + # First try to open it as a Tabix file + try: # from the tabix file is not possible to know the # global min and max - try: - self.tbx2 = pysam.TabixFile(self.properties['second_file']) - except IOError: - self.interval_tree2, __, __ = file_to_intervaltree(self.properties['second_file']) - # load the file as an interval tree - else: + self.tbx2 = pysam.TabixFile(self.properties['second_file']) + except IOError: + # load the file as an interval tree self.interval_tree2, __, __ = file_to_intervaltree(self.properties['second_file']) def set_properties_defaults(self): @@ -183,17 +181,13 @@ def set_properties_defaults(self): def load_file(self): self.tbx = None - # try to load a tabix file is available - if self.properties['file'].endswith(".bgz"): + # try to load a tabix file if available + try: # from the tabix file is not possible to know the # global min and max - try: - self.tbx = pysam.TabixFile(self.properties['file']) - except IOError: - self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'], - self.properties['region']) - # load the file as an interval tree - else: + self.tbx = pysam.TabixFile(self.properties['file']) + except IOError: + # load the file as an interval tree self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'], self.properties['region']) @@ -235,7 +229,7 @@ def get_scores(self, chrom_region, start_region, end_region, return_nans=True, tbx_var='self.tbx', inttree_var='self.interval_tree'): """ Retrieves the score (or scores or whatever fields are in a bedgraph like file) and the positions - for a given region. + for a given region. If return_nans is True the pos_list goes until at least end_region. In case there is no item in the region. It returns [], [] Args: chrom_region: @@ -243,7 +237,7 @@ def get_scores(self, chrom_region, start_region, end_region, end_region: Returns: tuple: - scores_list, post_list + scores_list, pos_list """ score_list = [] pos_list = [] @@ -294,6 +288,11 @@ def get_scores(self, chrom_region, start_region, end_region, score_list.append(values) pos_list.append((start, end)) + # Add a last value if needed: + if prev_end < end_region and return_nans: + score_list.append(np.repeat(np.nan, self.num_fields)) + pos_list.append((prev_end, end_region)) + return score_list, pos_list def plot(self, ax, chrom_region, start_region, end_region): diff --git a/pygenometracks/tracks/HiCMatrixTrack.py b/pygenometracks/tracks/HiCMatrixTrack.py index 5b4a2f42..aa80aa39 100644 --- a/pygenometracks/tracks/HiCMatrixTrack.py +++ b/pygenometracks/tracks/HiCMatrixTrack.py @@ -155,7 +155,7 @@ def set_properties_defaults(self): else: return # We need to get the size before masking bins because - # HiCMatrix v13 give smaller chromosome_sizes after: + # HiCMatrix>=v13 give smaller chromosome_sizes after: self.chrom_sizes = self.hic_ma.get_chromosome_sizes() if self.properties['show_masked_bins']: pass diff --git a/pygenometracks/utilities.py b/pygenometracks/utilities.py index e9df2c8e..abde263c 100644 --- a/pygenometracks/utilities.py +++ b/pygenometracks/utilities.py @@ -7,6 +7,13 @@ import pybedtools import tempfile import warnings +import logging + + +FORMAT = "[%(levelname)s:%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s" +logging.basicConfig(format=FORMAT) +log = logging.getLogger(__name__) +log.setLevel(logging.DEBUG) class InputError(Exception): @@ -86,6 +93,14 @@ def temp_file_from_intersect(file_name, plot_regions=None, around_region=0): file_to_open = original_file.intersect(regions, wa=True, u=True).fn except pybedtools.helpers.BEDToolsError: file_to_open = file_name + except NotImplementedError: + log.warning("BEDTools is not installed pygenometracks" + " will be slower.") + file_to_open = file_name + except Exception as e: + log.warning(f"BEDTools intersect raised: {e}" + "\nWill not subset the file.") + file_to_open = file_name sys.stderr.close() sys.stderr = sys.__stderr__ with open(temporary_file.name, 'r') as f: @@ -94,9 +109,9 @@ def temp_file_from_intersect(file_name, plot_regions=None, around_region=0): error_lines = [line for line in temp_std_error if 'error' in line.lower()] if len(error_lines) > 0: error_lines_printable = '\n'.join(error_lines) - sys.stderr.write("Bedtools intersect raised an error:\n" - f"{error_lines_printable}\n" - "Will not use bedtools.\n") + log.warning("BEDTools intersect raised an error:\n" + f"{error_lines_printable}\n" + "Will not use BEDTools.\n") file_to_open = file_name return file_to_open @@ -171,7 +186,11 @@ def file_to_intervaltree(file_name, plot_regions=None): valid_intervals += 1 if valid_intervals == 0: - sys.stderr.write(f"No valid intervals were found in file {file_name}") + if file_to_open == file_name: + suffix = " after intersection with the plotted region" + else: + suffix = "" + log.warning(f"No valid intervals were found in file {file_name}{suffix}") file_h.close() return interval_tree, min_value, max_value diff --git a/requirements.txt b/requirements.txt index 92c2f103..23241406 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,9 +3,9 @@ matplotlib ==3.1.1 intervaltree >=2.1.0 pybigwig >=0.3.16 future >=0.17.0 -hicmatrix >=13 +hicmatrix >=15 pysam >=0.14 gffutils >=0.9 pybedtools >=0.8.1 tqdm >=4.20 -libopenblas < 0.3.10 + diff --git a/setup.py b/setup.py index b6758cfb..fd438fa2 100644 --- a/setup.py +++ b/setup.py @@ -95,7 +95,7 @@ def checkProgramIsInstalled(self, program, args, where_to_download, "intervaltree >=2.1.0", "pyBigWig >=0.3.16", "future >=0.17.0", - "hicmatrix >=13", + "hicmatrix >=15", "pysam >=0.14", "pytest", "gffutils >=0.9",