From 356898e28beee0ee396737cd7bfdccded950dc5a Mon Sep 17 00:00:00 2001 From: Lee Katz Date: Mon, 2 Dec 2024 09:41:43 -0500 Subject: [PATCH] Update emails (#72) * ignore more perl libs and test files * update saveFailedGenomes to work with new fastq metrics * remove featureio * html multipart email * table sorted with footer * bump version to 0.27.1 * updated base64 * bump versions --------- Co-authored-by: edlb-sneakernet --- .gitignore | 10 ++ SneakerNet.plugins/emailWhoever.pl | 157 ++++++++++++++++++--- SneakerNet.plugins/sn_saveFailedGenomes.pl | 38 +++-- lib/perl5/SneakerNet.pm | 2 +- 4 files changed, 165 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index f451c8d..94075a1 100644 --- a/.gitignore +++ b/.gitignore @@ -34,18 +34,28 @@ lib/perl5/Try lib/perl5/x86_64-linux-thread-multi lib/perl5/GD lib/perl5/perl5 +lib/perl5/App +lib/perl5/Statistics +lib/perl5/TAP +lib/perl5/Tree +lib/perl5/XML t/M00123-18-001-test/SneakerNet/ t/M00123-18-001-test/readMetrics.tsv t/M00123-18-001-test/samples.tsv +t/M00123-18-001-asm t/M00123-18-001-test/contaminated_1.fastq.gz t/M00123-18-001-test/contaminated_2.fastq.gz t/kraken-database +t/bioprojectSurveillance* db/* db/fasta/* +/env +/findModules.pl + bin/bdf2gdfont.pl man diff --git a/SneakerNet.plugins/emailWhoever.pl b/SneakerNet.plugins/emailWhoever.pl index 37408b2..383ed58 100755 --- a/SneakerNet.plugins/emailWhoever.pl +++ b/SneakerNet.plugins/emailWhoever.pl @@ -16,13 +16,15 @@ use POSIX qw/strftime/; use IO::Compress::Zip qw(zip $ZipError); +use MIME::Base64; + $ENV{PATH}="$ENV{PATH}:/opt/cg_pipeline/scripts"; use Config::Simple; use SneakerNet qw/exitOnSomeSneakernetOptions recordProperties readConfig passfail command logmsg version/; use List::MoreUtils qw/uniq/; -our $VERSION = "3.4"; +our $VERSION = "3.7"; our $CITATION= "Email whoever by Lee Katz"; my $snVersion=version(); @@ -159,35 +161,63 @@ sub emailWhoever{ logmsg "To: $to"; my $from=$$settings{from} || die "ERROR: need to set 'from' in the settings.conf file!"; my $subject="$runName QC"; - my $body ="Please see below for QC information on $runName.\n\n"; + + my $body ="
\n"; + $body.="Please see below for QC information on $runName.\n\n"; $body.="For more details, please see the other attachments.\n"; - $body.=" - TSV files can be opened in Excel\n"; - $body.=" - LOG files can be opened in Wordpad, Notepad++, or VSCode\n"; - $body.=" - HTML files can be opened in Edge\n"; - $body.=" - Full path: ".realpath($dir)."/SneakerNet\n"; + $body.="
    \n"; + $body.="
  • TSV files can be opened in Excel
  • \n"; + $body.="
  • LOG files can be opened in Wordpad, Notepad++, or VSCode
  • \n"; + $body.="
  • HTML files can be opened in Edge
  • \n"; + $body.="
  • Full path: ".realpath($dir)."/SneakerNet
  • \n"; + $body.="
\n"; $body.="\nThis message was brought to you by SneakerNet v$snVersion!\n"; - $body.="Documentation can be found at https://github.com/lskatz/SneakerNet\n"; + $body.="

Documentation can be found at https://github.com/lskatz/SneakerNet

\n"; + $body.="
\n"; # Failure messages in the body - $body.="\nAny samples that have failed QC as shown in passfail.tsv are listed below.\n"; + $body.="
\n"; + $body.="Any samples that have failed QC as shown in passfail.tsv are listed below.\n"; + $body.="
    \n"; for my $fastq(keys(%$failure)){ my $failureMessage=""; for my $failureCategory(keys(%{$$failure{$fastq}})){ if($$failure{$fastq}{$failureCategory} == 1){ - $failureMessage.=$fastq."\n"; + $failureMessage.="
  • $fastq
  • \n"; last; # just list a given failed fastq once } } $body.=$failureMessage; } + $body.="
\n"; + + $body = tsvToHtml("$dir/SneakerNet/forEmail/QC_summary.tsv", $settings); + $body .= "

\n"; + $body .= "This message was brought to you by SneakerNet v$snVersion!\n"; + $body .= "Documentation can be found at github.com/lskatz/SneakerNet.\n"; + $body .= "

\n"; + + + # https://stackoverflow.com/a/11725308 + my $mailpart = generate_uuid(); + my $mailpart_body = generate_uuid(); my $emailFile = "$$settings{tempdir}/email.txt"; open(my $fh, ">", $emailFile) or die "ERROR: could not write to $emailFile: $!"; print $fh "To: $to\n"; print $fh "From: $from\n"; print $fh "Subject: $subject\n"; + print $fh "MIME-Version: 1.0\n"; + print $fh "Content-Type: multipart/mixed; boundary=\"$mailpart\"\n"; + print $fh "\n"; + print $fh "--$mailpart\n"; + print $fh "Content-Type: multipart/alternative; boundary=\"$mailpart_body\"\n"; print $fh "\n"; + print $fh "--$mailpart_body\n"; + print $fh "Content-Type: text/html; charset=\"utf-8\"\n"; + print $fh "Content-Disposition: inline\n"; print $fh "$body\n"; + print $fh "--$mailpart_body--\n"; # Save a list of files to be attached my @attachment; @@ -208,7 +238,7 @@ sub emailWhoever{ my @finalAttachment; for my $file(@attachment){ if(-s $file > 1e7){ - logmsg "NOTE: $file is too big. I will not attach it."; + logmsg "WARNING: $file is too big. I will not attach it."; } else { push(@finalAttachment, $file); } @@ -226,7 +256,7 @@ sub emailWhoever{ # Finally, attach the files for my $file(@finalAttachment){ - append_attachment($fh, $file); + append_attachment($fh, $file, $mailpart); } close $fh; @@ -240,6 +270,92 @@ sub emailWhoever{ # Utility subs # ################ +sub generate_uuid { + my @chars = ('a'..'f', 0..9); + my $uuid = ''; + + $uuid .= $chars[rand @chars] for 1..8; + $uuid .= '-'; + $uuid .= $chars[rand @chars] for 1..4; + $uuid .= '-'; + $uuid .= $chars[rand @chars] for 1..4; + $uuid .= '-'; + $uuid .= $chars[rand @chars] for 1..4; + $uuid .= '-'; + $uuid .= $chars[rand @chars] for 1..12; + + return $uuid; +} + +# Transform a tsv file into an html string +sub tsvToHtml{ + my($tsv, $settings) = @_; + + my $html; + + my @footer; + + $html .= "\n"; + + $html .= ""; + + my @evenOddBackground = ('#EEE','#CCC'); + + # Read the table and divvy it up into header, body, footer + my(@body, $footer); + open(my $fh, "<", $tsv) or die "ERROR: could not read $tsv: $!"; + my $header = <$fh>; + chomp($header); + my @header = split(/\t/, lc($header)); + while(my $line = <$fh>){ + chomp($line); + my @F = split(/\t/, $line); + my %F; + @F{@header} = @F; + if($line =~ /^#/){ + $line =~ s/^#\s*//; + push(@footer, $line); + } else { + push(@body, \%F); + } + } + close $fh; + + # Sort the body + @body = sort{ + $$a{score} <=> $$b{score} || + $$a{sample} cmp $$b{sample} + } @body; + + $html .= "\n"; + $html .= "\n"; + $html .= "\n"; + for my $hash(@body){ + # Background color is determined by running the line number mod number of colors + my $background = $evenOddBackground[$. % scalar(@evenOddBackground)]; + + $html .= "\n"; + for my $h(@header){ + $html .= " \n"; + } + $html .= "\n"; + } + $html .= "
" . join("", @header) . "
$$hash{$h}
\n"; + + # Footer lines + if(@footer){ + $html .= "\n"; + } + + $html .= "\n"; + + return $html; +} + # http://stackoverflow.com/a/20359734 sub flatten { map { ref $_ ? flatten(@{$_}) : $_ } @_; @@ -289,22 +405,21 @@ sub zip_file { # Add an attachment to an email file handle sub append_attachment { - my ($fh, $file_path) = @_; + my ($fh, $file_path, $separator) = @_; # Encode the attachment content using base64 encoding my $attachment_name = basename($file_path); - - open(my $attachment_fh, "<", $file_path) or die "Failed to open attachment file $file_path: $!"; - binmode $attachment_fh; - my $attachment_content = do { local $/; <$attachment_fh> }; - close $attachment_fh; - - my $encoded_content = pack("u", $attachment_content); + my $attachment_ext = $attachment_name; + $attachment_ext =~ s/.+\.//; + my $encoded_content = encode_base64(`cat $file_path`); die "Failed to encode attachment content from $file_path: $!" if $?; - print $fh "begin 644 $attachment_name\n"; + print $fh "--$separator\n"; + print $fh "Content-Type: application/$attachment_ext; name=\"$attachment_name\"\n"; + print $fh "Content-Transfer-Encoding: base64\n"; + print $fh "Content-Disposition: attachment; filename=\"$attachment_name\"\n"; + print $fh "\n"; print $fh $encoded_content . "\n"; - print $fh "end\n"; # Print a newline to separate MIME parts print $fh "\n"; diff --git a/SneakerNet.plugins/sn_saveFailedGenomes.pl b/SneakerNet.plugins/sn_saveFailedGenomes.pl index 0294453..4849a4b 100755 --- a/SneakerNet.plugins/sn_saveFailedGenomes.pl +++ b/SneakerNet.plugins/sn_saveFailedGenomes.pl @@ -9,13 +9,12 @@ use File::Temp qw/tempdir/; use File::Copy qw/mv cp/; use Bio::SeqIO; -use Bio::FeatureIO::gff; use FindBin; use lib "$FindBin::RealBin/../lib/perl5"; use SneakerNet qw/exitOnSomeSneakernetOptions recordProperties readConfig samplesheetInfo_tsv command logmsg fullPathToExec/; -our $VERSION = "1.4.1"; +our $VERSION = "1.5.0"; our $CITATION = "Save failed genomes by Lee Katz"; # For any warnings in the SN report @@ -77,8 +76,7 @@ sub saveGenomes{ chomp; my %F; @F{@header}=split(/\t/,$_); - $F{File} = basename($F{File}); - $readMetrics{$F{File}} = \%F; + $readMetrics{$F{Sample}} = \%F; } close READMETRICS; @@ -89,26 +87,23 @@ sub saveGenomes{ # Get the name of all files linked to this file through the sample. $$sampleInfo{$samplename}{fastq}//=[]; # Set {fastq} to an empty list if it does not exist - my @file=@{$$sampleInfo{$samplename}{fastq}}; - for my $fastq(@file){ - my $fastqMetrics = $readMetrics{basename($fastq)}; - - # Coverage - if($$fastqMetrics{coverage} eq '.'){ # dot means coverage is unknown - $totalCoverage = -1; # -1 means 'unknown' coverage - logmsg "$fastq unknown coverage" if($$settings{debug}); - } else { - $$fastqMetrics{coverage} ||= 0; # force it to be a number if it isn't already - $totalCoverage += $$fastqMetrics{coverage}; - logmsg "Sample $samplename += $$fastqMetrics{coverage}x => ${totalCoverage}x" if($$settings{debug}); - } + + my $fastqMetrics = $readMetrics{$samplename}; + + if($$fastqMetrics{coverage} eq '.'){ # dot means coverage is unknown + $totalCoverage = -1; # -1 means 'unknown' coverage + logmsg "$samplename unknown coverage" if($$settings{debug}); + } else { + $$fastqMetrics{coverage} ||= 0; # force it to be a number if it isn't already + $totalCoverage = $$fastqMetrics{coverage}; + logmsg "Sample $samplename = ${totalCoverage}x" if($$settings{debug}); } if( $totalCoverage >= $$settings{coverage} && $totalCoverage < $$sampleInfo{$samplename}{taxonRules}{coverage} ){ - $saved{$samplename} = \@file; + $saved{$samplename} = $$sampleInfo{$samplename}{fastq} } } @@ -142,11 +137,14 @@ sub rsync{ my $subfolder = "$$sample{taxonRules}{dest_subfolder}/QC_fails"; my $fileString = join(" ", @{$$sample{fastq}}); - my $command = "rsync -av -q --no-motd -av --no-g --copy-links --chmod=Du=rwx,Dg=rx,Do=rx,Fu=rw,Fg=r,Fo=r $fileString $$settings{transfer_destination_string}/$subfolder/"; + my $command = "rsync -e 'ssh -q' -av --no-motd --no-g --copy-links --chmod=Du=rwx,Dg=rx,Do=rx,Fu=rw,Fg=r,Fo=r $fileString $$settings{transfer_destination_string}/$subfolder/"; if($$settings{debug}){ logmsg "COMMAND: $command"; } else { - command("rsync -q --no-motd -av --no-g --copy-links --chmod=Du=rwx,Dg=rx,Do=rx,Fu=rw,Fg=r,Fo=r $fileString $$settings{transfer_destination_string}/$subfolder/"); + system($command); + if($?){ + die "ERROR: could not rsync $fileString ==> $$settings{transfer_destination_string}/$subfolder/: $!"; + } } return 1; diff --git a/lib/perl5/SneakerNet.pm b/lib/perl5/SneakerNet.pm index e630f24..51d7c9a 100644 --- a/lib/perl5/SneakerNet.pm +++ b/lib/perl5/SneakerNet.pm @@ -36,7 +36,7 @@ TODO =cut -our $VERSION = version->declare('0.27.0'); +our $VERSION = version->declare('0.27.2'); our %rankName = (S=>'species', G=>'genus', F=>'family', O=>'order', C=>'class', P=>'phylum', K=>'kingdom', D=>'domain', U=>'unclassified'); our @rankOrder= qw(S G F O C P K D U); our %rankOrder= (S=>0, G=>1, F=>2, O=>3, C=>4, P=>5, K=>6, D=>7, U=>8);