-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #306 from sanger-tol/main
Main to Dev
- Loading branch information
Showing
18 changed files
with
104 additions
and
254 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,82 @@ | ||
#!/bin/bash | ||
cram_path=$1 | ||
chunkn=0 | ||
for cram in ${cram_path}/*.cram; do | ||
rgline=$(samtools view -H $cram|grep "RG"|sed 's/\t/\\t/g'|sed "s/'//g") | ||
|
||
crampath=$(readlink -f ${cram}) | ||
# generate_cram_csv.sh | ||
# ------------------- | ||
# Generate a csv file describing the CRAM folder | ||
# ><((((°> Y ><((((°> U ><((((°> M ><((((°> I ><((((°> | ||
# Author = yy5 | ||
# ><((((°> Y ><((((°> U ><((((°> M ><((((°> I ><((((°> | ||
|
||
ncontainers=$(zcat ${crampath}.crai|wc -l) | ||
base=$(basename $cram .cram) | ||
# Function to process chunking of a CRAM file | ||
chunk_cram() { | ||
local cram=$1 | ||
local chunkn=$2 | ||
local outcsv=$3 | ||
realcram=$(readlink -f ${cram}) | ||
realcrai=$(readlink -f ${cram}.crai) | ||
local rgline=$(samtools view -H "${realcram}" | grep "@RG" | sed 's/\t/\\t/g' | sed "s/'//g") | ||
local ncontainers=$(zcat "${realcrai}" | wc -l) | ||
local base=$(basename "${realcram}" .cram) | ||
local from=0 | ||
local to=10000 | ||
|
||
from=0 | ||
to=10000 | ||
|
||
|
||
while [ $to -lt $ncontainers ] | ||
do | ||
echo $crampath,${crampath}.crai,${from},${to},${base},${chunkn},${rgline} | ||
from=$((to+1)) | ||
((to+=10000)) | ||
while [ $to -lt $ncontainers ]; do | ||
echo "${realcram},${realcrai},${from},${to},${base},${chunkn},${rgline}" >> $outcsv | ||
from=$((to + 1)) | ||
((to += 10000)) | ||
((chunkn++)) | ||
done | ||
|
||
if [ $from -le $ncontainers ] | ||
then | ||
echo $crampath,${crampath}.crai,${from},${ncontainers},${base},${chunkn},${rgline} | ||
if [ $from -le $ncontainers ]; then | ||
echo "${realcram},${realcrai},${from},${ncontainers},${base},${chunkn},${rgline}" >> $outcsv | ||
((chunkn++)) | ||
fi | ||
|
||
echo $chunkn | ||
} | ||
|
||
# Function to process a CRAM file | ||
process_cram_file() { | ||
local cram=$1 | ||
local chunkn=$2 | ||
local outcsv=$3 | ||
|
||
local read_groups=$(samtools view -H "$cram" | grep '@RG' | awk '{for(i=1;i<=NF;i++){if($i ~ /^ID:/){print substr($i,4)}}}') | ||
local num_read_groups=$(echo "$read_groups" | wc -w) | ||
|
||
if [ "$num_read_groups" -gt 1 ]; then | ||
# Multiple read groups: process each separately | ||
for rg in $read_groups; do | ||
local output_cram="$(basename "${cram%.cram}")_output_${rg}.cram" | ||
samtools view -h -r "$rg" -o "$output_cram" "$cram" | ||
samtools index "$output_cram" | ||
chunkn=$(chunk_cram "$output_cram" "$chunkn" "$outcsv") | ||
done | ||
else | ||
# Single read group or no read groups | ||
chunkn=$(chunk_cram "$cram" "$chunkn" "$outcsv") | ||
fi | ||
|
||
echo $chunkn | ||
} | ||
|
||
# /\_/\ /\_/\ | ||
# ( o.o ) main ( o.o ) | ||
# > ^ < > ^ < | ||
|
||
# Check if cram_path is provided | ||
if [ -z "$1" ]; then | ||
echo "Usage: $0 <cram_path>" | ||
exit 1 | ||
fi | ||
|
||
cram_path=$1 | ||
chunkn=0 | ||
outcsv=$2 | ||
|
||
# Loop through each CRAM file in the specified directory. cram cannot be the synlinked cram | ||
for cram in ${cram_path}/*.cram; do | ||
realcram=$(readlink -f $cram) | ||
chunkn=$(process_cram_file $realcram $chunkn $outcsv) | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,4 +49,3 @@ process SELFCOMP_SPLITFASTA { | |
END_VERSIONS | ||
""" | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.