From 7dcd9b28e33c66155cff49bdf710ed745391033c Mon Sep 17 00:00:00 2001 From: Loncova Date: Wed, 15 Dec 2021 17:44:01 +0100 Subject: [PATCH 1/5] first commit --- modules/preprocess.R | 1 + 1 file changed, 1 insertion(+) create mode 100644 modules/preprocess.R diff --git a/modules/preprocess.R b/modules/preprocess.R new file mode 100644 index 0000000..3c24513 --- /dev/null +++ b/modules/preprocess.R @@ -0,0 +1 @@ +# code for preprocessing of the samplesheets \ No newline at end of file From 0cb1444f370043f9faff27097367cb7f1d1c07d3 Mon Sep 17 00:00:00 2001 From: Loncova Date: Wed, 15 Dec 2021 18:09:42 +0100 Subject: [PATCH 2/5] first commit trial --- modules/preprocess.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/preprocess.R b/modules/preprocess.R index 3c24513..b9e41de 100644 --- a/modules/preprocess.R +++ b/modules/preprocess.R @@ -1 +1,3 @@ -# code for preprocessing of the samplesheets \ No newline at end of file +# code for preprocessing of the samplesheets + +# some lines of code \ No newline at end of file From bc46059bb6268f7ba29da5cd5eedb8222c9c1667 Mon Sep 17 00:00:00 2001 From: Loncova Date: Fri, 21 Jan 2022 10:44:20 +0100 Subject: [PATCH 3/5] code first version some description --- modules/preprocess.R | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/modules/preprocess.R b/modules/preprocess.R index b9e41de..41c6511 100644 --- a/modules/preprocess.R +++ b/modules/preprocess.R @@ -1,3 +1,27 @@ # code for preprocessing of the samplesheets -# some lines of code \ No newline at end of file +# At the very beginning of the pipeline, we need to verify that the samplesheet makes sense, and sanitize it. +# For instance, the samplesheet used by nf-core/rnaseq can contain multiple rows for the same sample +# (in case multiple fastq files need to be merged). + +# Input: samplesheet as provided by user +# Output: cleaned samplesheet with one row per sample. + +library (dplyr) + +# load input files, e.g. test data + +sampleAnnotation <- read_csv("https://raw.githubusercontent.com/icbi-lab/nf-deseq2/main/tests/testdata/example_nfcore/rnaseq_samplesheet_nfcore-3.1.csv") +geneCounts <- read_tsv("https://raw.githubusercontent.com/icbi-lab/nf-deseq2/main/tests/testdata/example_nfcore/salmon.merged.gene_counts.subset.tsv") + +### notes: +### check gene counts for column names with samples +### take it and filter samplesheets based on that - i.e. output samplesheet will have only 6 lines in total, only those, that are different + +cond_col = "group" +# sample_col = NULL +contrast = c("group", "grpA", "grpB") + +sampleAnnotation2 <- filter(sampleAnnotation, get(cond_col) %in% contrast[2:3]) + +#### end end end From 33de3e2d66e6958d8fb918f4d0b0d04f9bf657b5 Mon Sep 17 00:00:00 2001 From: Loncova Date: Fri, 21 Jan 2022 16:43:01 +0100 Subject: [PATCH 4/5] samplesheet preprocessing first trial --- modules/preprocess.R | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/modules/preprocess.R b/modules/preprocess.R index 41c6511..db10929 100644 --- a/modules/preprocess.R +++ b/modules/preprocess.R @@ -11,17 +11,19 @@ library (dplyr) # load input files, e.g. test data -sampleAnnotation <- read_csv("https://raw.githubusercontent.com/icbi-lab/nf-deseq2/main/tests/testdata/example_nfcore/rnaseq_samplesheet_nfcore-3.1.csv") -geneCounts <- read_tsv("https://raw.githubusercontent.com/icbi-lab/nf-deseq2/main/tests/testdata/example_nfcore/salmon.merged.gene_counts.subset.tsv") +sampleAnnotation <- read_csv("https://raw.githubusercontent.com/icbi-lab/nf-deseq2/main/tests/testdata/example_nfcore/rnaseq_samplesheet_nfcore-3.1.csv", show_col_types = FALSE) +geneCounts <- read_tsv("https://raw.githubusercontent.com/icbi-lab/nf-deseq2/main/tests/testdata/example_nfcore/salmon.merged.gene_counts.subset.tsv", show_col_types = F) -### notes: -### check gene counts for column names with samples -### take it and filter samplesheets based on that - i.e. output samplesheet will have only 6 lines in total, only those, that are different +sampleAnnotation2 <- sampleAnnotation %>% + select(-fastq_1, -fastq_2) %>% + distinct() # filter distinct rows -cond_col = "group" -# sample_col = NULL -contrast = c("group", "grpA", "grpB") +# another approach +geneCounts2 <- select(geneCounts, -gene_id, -gene_name) %>% + colnames() +outputSheet <- filter(sampleAnnotation, sample==geneCounts2) # this preserves two more columns -sampleAnnotation2 <- filter(sampleAnnotation, get(cond_col) %in% contrast[2:3]) -#### end end end +#output cleaned samplesheet +print(sampleAnnotation2) +print(outputSheet) From 1a0bdd851da5a9cb2b84004647b6a468b8698163 Mon Sep 17 00:00:00 2001 From: Loncova Date: Fri, 21 Jan 2022 16:45:36 +0100 Subject: [PATCH 5/5] samplesheet preprocessing first trial --- modules/preprocess.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/preprocess.R b/modules/preprocess.R index db10929..21afe3a 100644 --- a/modules/preprocess.R +++ b/modules/preprocess.R @@ -24,6 +24,6 @@ geneCounts2 <- select(geneCounts, -gene_id, -gene_name) %>% outputSheet <- filter(sampleAnnotation, sample==geneCounts2) # this preserves two more columns -#output cleaned samplesheet +# output cleaned samplesheets print(sampleAnnotation2) print(outputSheet)