diff --git a/mRNAData/mRNADataSetsCLI.py b/mRNAData/mRNADataSetsCLI.py
index fa0caea9..9e8a1eab 100644
--- a/mRNAData/mRNADataSetsCLI.py
+++ b/mRNAData/mRNADataSetsCLI.py
@@ -19,10 +19,10 @@ def main():
         dat = cptac.Brca()
     elif opts.type.lower() == 'ccrcc':
         dat = cptac.Ccrcc()
-    elif opts.type.lower() == 'colon':
-        dat = cptac.Colon()
-    elif opts.type.lower() == 'endometrial':
-        dat = cptac.Endometrial()
+    elif opts.type.lower() == 'coad':
+        dat = cptac.Coad()
+    elif opts.type.lower() == 'ucec':
+        dat = cptac.Ucec()
     elif opts.type.lower() == 'gbm':
         dat = cptac.Gbm()
     elif opts.type.lower() == 'hnscc':
@@ -32,7 +32,9 @@ def main():
     elif opts.type.lower() == 'luad':
         dat = cptac.Luad()
     elif opts.type.lower() == 'ovarian':
-        dat = cptac.Ovarian()
+        dat = cptac.Ov()
+    elif opts.type.loewr() == 'pdac':
+        dat = cptac.Pdac()        
     else:
         exit()
     df = dat.get_transcriptomics()
diff --git a/metrics/compare-matrices/compareSigMatrices.py b/metrics/compare-matrices/compareSigMatrices.py
index 5abdebb9..14f2dc24 100644
--- a/metrics/compare-matrices/compareSigMatrices.py
+++ b/metrics/compare-matrices/compareSigMatrices.py
@@ -19,10 +19,17 @@ def get_cor_stat(dat, prot_markers):
     the correlation of the entitites for a subset of proteins
     '''
 
+    # we have to add data source
+    sources = dat.list_data_sources()
+    psource = [row['Available sources'].split(',')[0] for index,row in sources.iterrows() if row['Data type']=='proteomics']
+    tsource = [row['Available sources'].split(',')[0] for index,row in sources.iterrows() if row['Data type']=='transcriptomics']
+    genes = [p for p in prot_markers['NAME']]
     #get the merged data frame from cptac data
     merged = dat.join_omics_to_omics(df1_name='proteomics', \
                                      df2_name='transcriptomics', \
-                                     genes1=prot_markers['NAME'], genes2=prot_markers['NAME'])
+                                     df1_source=psource[0],\
+                                     df2_source=tsource[0],\
+                                     genes1=genes, genes2=genes)
     if merged.columns.nlevels == 2:
         merged.columns = merged.columns.droplevel(1)
 
@@ -32,16 +39,18 @@ def get_cor_stat(dat, prot_markers):
     prots['Name'] = prots.index
     prots = prots.melt(id_vars="Name", var_name='id', value_name='prot')
     prots['Gene'] = prots['id'].str.split('_', expand=True)[0]
-
+    prots = prots.drop('id',axis=1)
+    
     trans = merged.loc[:, merged.columns.str.contains('transcriptomics')]
     trans['Name'] = trans.index
     trans = trans.melt(id_vars="Name", var_name='id', value_name='trans')
     trans['Gene'] = trans['id'].str.split('_', expand=True)[0]
+    trans=trans.drop('id',axis=1)
 
     #remerge the proteins and transcripts and compute spearman rank correlation
     both_df = pandas.merge(prots, trans, how='inner', left_on=['Name', 'Gene'],\
                            right_on=['Name', 'Gene'])
-    corvals = both_df[['Name', 'Gene', 'prot', 'trans']].groupby('Gene').corr('spearman').unstack()['trans']['prot']
+    corvals = both_df[['Gene', 'prot', 'trans']].groupby('Gene').corr('spearman').unstack()['trans']['prot']
     corvals = pandas.DataFrame(corvals)
     corvals["NAME"] = corvals.index
     #print(corvals.head())
@@ -57,15 +66,24 @@ def get_na_counts(dat, prot_markers):
     we also want to see how many values are missing between mRNA and protein
 
     '''
+    # we have to add data source
+    sources = dat.list_data_sources()
+    psource = [row['Available sources'].split(',')[0] for index,row in sources.iterrows() if row['Data type']=='proteomics']
+    tsource = [row['Available sources'].split(',')[0] for index,row in sources.iterrows() if row['Data type']=='transcriptomics']
+    genes = [p for p in prot_markers['NAME']]
+    #get the merged data frame from cptac data
     merged = dat.join_omics_to_omics(df1_name='proteomics', \
                                      df2_name='transcriptomics', \
-                                     genes1=prot_markers['NAME'], genes2=prot_markers['NAME'])
+                                     df1_source=psource[0],\
+                                     df2_source=tsource[0],\
+                                     genes1=genes, genes2=genes)
+
     if merged.columns.nlevels == 2:
         merged.columns = merged.columns.droplevel(1)
 
     sums = merged.isna().sum()/merged.shape[0]
     inds = sums.index.str.split('_', expand=False)
-    sumtab = pandas.DataFrame({'NAs':sums, 'NAME': [i[0] for i in inds], 'Data': [i[1] for i in inds]})
+    sumtab = pandas.DataFrame({'NAs':sums, 'NAME': [i[0] for i in inds], 'Data': [i[2] for i in inds]})
 
     counts = sumtab.merge(prot_markers).groupby(['cell_type', 'Data']).mean("NAs")
     return counts
@@ -96,25 +114,29 @@ def main():
     ##now we get the cptac datasets from the package
     dslist = {}
     countlist = {}
-    for ds in ['brca', 'ccrcc', 'endometrial', 'colon',
-               'ovarian', 'luad', 'hnscc', 'gbm']:
-        cptac.download(dataset=ds)
+    for ds in ['brca', 'ccrcc', 'ucec', 'coad','pdac',
+               'ovarian', 'luad', 'hnscc', 'gbm','lscc']:
+        #cptac.download(dataset=ds)
         if ds == 'brca':
             dat = cptac.Brca()
         elif ds == 'ccrcc':
             dat = cptac.Ccrcc()
-        elif ds == 'colon':
-            dat = cptac.Colon()
+        elif ds == 'coad':
+            dat = cptac.Coad()
         elif ds == 'ovarian':
-            dat = cptac.Ovarian()
-        elif ds == 'endometrial':
-            dat = cptac.Endometrial()
+            dat = cptac.Ov()
+        elif ds == 'ucec':
+            dat = cptac.Ucec()
         elif ds == 'hnscc':
             dat = cptac.Hnscc()
         elif ds == 'luad':
             dat = cptac.Luad()
         elif ds == 'gbm':
             dat = cptac.Gbm()
+        elif ds == 'pdac':
+            dat = cptac.Pdac()
+        elif ds == 'lscc':
+            dat = cptac.Lscc()
         else:
             exit()
             ##now get the data, create one large data frame
@@ -124,11 +146,15 @@ def main():
         countlist[ds] = get_na_counts(dat, prot_markers)
         countlist[ds]['Tumor'] = ds
 
+
+    mval= os.path.splitext(os.path.basename(args.matrix))[0]
     #then plot cor values by cell type and cancer type
     fulltab = pandas.concat([dslist[d] for d in dslist.keys()])
     fulltab['Tumor'] = fulltab.Tumor.astype('category')
     fulltab['cell_type'] = fulltab.cell_type.astype('category')
-    fulltab.to_csv(os.path.splitext(os.path.basename(args.matrix))[0]+'_allCors.csv')
+    fulltab['sigMatrix'] = mval
+    fulltab.to_csv(mval+'_allCors.csv')
+
 
     ##now we can plot the statistics using the plotnine pacakge
     print(fulltab.head())
@@ -140,7 +166,7 @@ def main():
         + ylab("Spearman rank correlation")
         + theme(axis_text_x=element_text(rotation=90, hjust=1))
     )
-    fname = os.path.splitext(os.path.basename(args.matrix))[0]+'_sigMatrix_box_and_points.pdf'
+    fname = mval+'_sigMatrix_box_and_points.pdf'
     plot.save(fname)
 
     plot = (
@@ -150,13 +176,14 @@ def main():
         + ylab("Spearman rank correlation")
         + theme(axis_text_x=element_text(rotation=90, hjust=1))
         )
-    fname = os.path.splitext(os.path.basename(args.matrix))[0]+'_sigMatrix_boxplot.pdf'
+    fname = mval+'_sigMatrix_boxplot.pdf'
     plot.save(fname, height=8, width=10)
 
     counttab = pandas.concat([countlist[d] for d in countlist.keys()])
+    counttab['SigMatrix']=mval
     print(counttab.head())
-    counttab.to_csv(os.path.splitext(os.path.basename(args.matrix))[0]+'_allNAs.csv')
-    counttab = pandas.read_csv(os.path.splitext(os.path.basename(args.matrix))[0]+'_allNAs.csv')
+    counttab.to_csv(mval+'_allNAs.csv')
+    counttab = pandas.read_csv(mval+'_allNAs.csv')
     plot = (
         ggplot(counttab, aes(x='cell_type', fill='Tumor', y='NAs'))
         + geom_bar(stat='identity', position='dodge')
@@ -165,7 +192,16 @@ def main():
         + ylab("Fraction missing data")
         + theme(axis_text_x=element_text(rotation=90, hjust=1))
     )
-    fname = os.path.splitext(os.path.basename(args.matrix))[0]+'_nacounts.pdf'
+    fname = mval+'_nacounts_bar.pdf'
+    plot.save(fname)
+    plot = (
+        ggplot(counttab, aes(x='cell_type', fill='Data', y='NAs'))
+        + geom_boxplot(position='dodge')
+        + xlab('Cell Type')
+        + ylab("Fraction missing data")
+        + theme(axis_text_x=element_text(rotation=90, hjust=1))
+    )
+    fname = mval+'_nacounts_box.pdf'
     plot.save(fname)
 
 
diff --git a/metrics/data-sim/runSamplingManually.py b/metrics/data-sim/runSamplingManually.py
new file mode 100644
index 00000000..0b18672e
--- /dev/null
+++ b/metrics/data-sim/runSamplingManually.py
@@ -0,0 +1,42 @@
+'''
+run sampling manually to avoid having to loop in CWL, whcih turned out to be cumberson
+
+'''
+
+import os
+
+filelist = []
+for i in [0,1,2,3,4]:
+    estring = 'cwltool simul-data-sampling.cwl --prot-sigs LM9 --simType prot --repNumber '+str(i)
+    print(estring)
+    os.system(estring)
+    fname='combined-cellType-correlation-'+str(i)+'.tsv'
+    os.rename(fname,'prot-LM9-'+fname)
+    filelist.append('prot-LM9-'+fname)
+
+    estring = 'cwltool simul-data-sampling.cwl --prot-sigs LM7c --simType prot --repNumber '+str(i)
+    print(estring)
+    os.system(estring)
+    fname='combined-cellType-correlation-'+str(i)+'.tsv'
+    os.rename(fname,'prot-LM7c-'+fname)
+    filelist.append('prot-LM7c-'+fname)
+    
+    estring = 'cwltool simul-data-sampling.cwl --rna-sigs LM22 --simType mrna --repNumber '+str(i)
+    print(estring)
+    os.system(estring)
+    fname='combined-cellType-correlation-'+str(i)+'.tsv'
+    os.rename(fname,'mrna-LM22-'+fname)
+    filelist.append('mrna-LM22-'+fname)
+
+    estring = 'cwltool simul-data-sampling.cwl --rna-sigs PBMC --simType mrna --repNumber '+str(i)
+    print(estring)
+    os.system(estring)
+    fname='combined-cellType-correlation-'+str(i)+'.tsv'
+    os.rename(fname,'mrna-pbmc-'+fname)
+    filelist.append('mrna-pbmc-'+fname)
+
+
+
+
+    
+    
diff --git a/metrics/data-sim/simul-data-comparison.cwl b/metrics/data-sim/simul-data-comparison.cwl
index bef96d24..4f3edfa0 100644
--- a/metrics/data-sim/simul-data-comparison.cwl
+++ b/metrics/data-sim/simul-data-comparison.cwl
@@ -33,9 +33,12 @@ inputs:
       default: ['LM7c','LM9']
    simType: 
       type: string
-   sample:
+   sample:  #how much do want to sample the matrix? 
       type: int
       default: 100
+   repNumber: ##if you wanted to run this numer times
+      type: int
+      defalut 0
 
 outputs:
    cell-cor-tab:
diff --git a/metrics/data-sim/simul-data-sampling.cwl b/metrics/data-sim/simul-data-sampling.cwl
index fab7e704..f0420f2e 100644
--- a/metrics/data-sim/simul-data-sampling.cwl
+++ b/metrics/data-sim/simul-data-sampling.cwl
@@ -17,19 +17,19 @@ inputs:
 #      default: 10
    samples:
       type: int[]
-      default: [20,40,80]#[10,20,40,60,80,100]
+      default: [10,20,40,60,80,100]
    mrna-perms:
       type: string[]
-      default: ['1','2','3','4','5','6','7','8','9','10','pbmc']
+      default: ['1','2','3','4','5']#,'6','7','8','9','10','pbmc']
    prot-perms:
       type: string[]
-      default: ['1','2','3']##,'4','5']
+      default: ['1','2','3','4','5']
    prot-algorithms:
       type: string[]
       default:
       - mcpcounter
-    #  - xcell
-    #  - epic
+      - xcell
+      - epic
       - cibersort
    rna-sigs: 
       type: string[]
@@ -39,6 +39,9 @@ inputs:
       default: ['LM7c','LM9']
    simType: 
       type: string
+   repNumber:
+      type: int
+      default: 0
 
 outputs:
    cell-cor-tab:
@@ -50,7 +53,7 @@ outputs:
 
 steps:
    run-all-algs-by-mrna:
-     run: sim-loop.cwl #call-deconv-on-sim.cwl
+     run: call-deconv-on-sim.cwl #call-deconv-on-sim.cwl
      when: $(inputs.simType.trim() == 'mrna')
      scatter: [protAlg,simulation,signature,sample]
      scatterMethod: flat_crossproduct
@@ -59,15 +62,16 @@ steps:
         simulation: mrna-perms
         signature: rna-sigs
         sample: samples
+        sampleRep: repNumber
         sampleType:
           valueFrom: 'normal'
         dataType:
           valueFrom: 'prot'
         simType: simType
      out:
-       [cell-cor-files]
+       [cell-cor-file]
    run-all-algs-by-prot:
-     run: sim-loop.cwl #call-deconv-on-sim.cwl
+     run: call-deconv-on-sim.cwl
      when: $(inputs.simType.trim() == 'prot')
      scatter: [protAlg,simulation,signature,sample]
      scatterMethod: flat_crossproduct
@@ -75,6 +79,7 @@ steps:
         protAlg: prot-algorithms
         simulation: prot-perms
         sample: samples
+        sampleRep: repNumber
         signature: prot-sigs
         sampleType:
           valueFrom: 'normal'
@@ -82,56 +87,19 @@ steps:
           valueFrom: 'prot'
         simType: simType
      out:
-        [cell-cor-files]
-   arrayBusiness:
-     run:
-      class: ExpressionTool
-      inputs:
-        arrayTwoDim:
-          type:
-            type: array
-            items:
-              type: array
-              items: File
-          inputBinding:
-            loadContents: true
-      outputs:
-         array1d:
-          type: File[]
-      expression: >
-          ${
-             var newArray= [];
-             for (var i = 0; i < inputs.arrayTwoDim.length; i++) {
-                for (var k = 0; k < inputs.arrayTwoDim[i].length; k++) {
-                   newArray.push((inputs.arrayTwoDim[i])[k]);
-            }
-          }
-          return { 'array1d' : newArray }
-          }
-     in:
-       arrayTwoDim:
-        source:
-           - run-all-algs-by-mrna/cell-cor-files
-           - run-all-algs-by-prot/cell-cor-files
-        linkMerge: merge_flattened
-        pickValue: all_non_null
-     out: [array1d]
+        [cell-cor-file]
    get-celltype-cors:
       run: ../figures/plot-figs.cwl
       in:
         metricType:
             valueFrom: "cellType"
         files:
-            source: arrayBusiness/array1d
-        #    type:
-        #      type: array
-        #      items:
-        #         type: array
-        #         items: File
-        #    source:
-        #      - run-all-algs-by-mrna/cell-cor-files
-        #      - run-all-algs-by-prot/cell-cor-files
-        #    linkMerge: merge_flattened
-        #    pickValue: all_non_null
+            source:
+              - run-all-algs-by-mrna/cell-cor-file
+              - run-all-algs-by-prot/cell-cor-file
+            linkMerge: merge_flattened
+            pickValue: all_non_null
+        repNumber:
+            source: repNumber
       out:
          [table,fig]
diff --git a/metrics/figures/combine_results.R b/metrics/figures/combine_results.R
index 910033bf..8ff07dda 100644
--- a/metrics/figures/combine_results.R
+++ b/metrics/figures/combine_results.R
@@ -5,11 +5,11 @@
 #'
 #'
 #' to run:
-#' Rscript  combine_results.R --metricType sample --metric correlation */*corr.tsv
-#' Rscript  combine_results.R --metricType cellType --metric correlation  */*corrXcelltypes.tsv
-#' Rscript  combine_results.R --metricType sample --metric meanCorrelation */*corr.tsv
-#' Rscript  combine_results.R --metricType cellType --metric meanCorrelation  */*corrXcelltypes.tsv
-#' Rscript  combine_results.R --metricType js --metric distance */*dist.tsv
+#' Rscript  combine_results.R --metricType sample --metric correlation --repNumber 0 */*corr.tsv
+#' Rscript  combine_results.R --metricType cellType --metric correlation  --repNumber 0  */*corrXcelltypes.tsv
+#' Rscript  combine_results.R --metricType sample --metric meanCorrelation  --repNumber 0 */*corr.tsv
+#' Rscript  combine_results.R --metricType cellType --metric meanCorrelation  --repNumber 0  */*corrXcelltypes.tsv
+#' Rscript  combine_results.R --metricType js --metric distance  --repNumber 0  */*dist.tsv
 
 library(dplyr)
 library(argparser)
@@ -305,25 +305,26 @@ main<-function(){
 
   ##todo: store in synapse
   argv <- commandArgs(trailingOnly = TRUE)
-  file.list<-argv[3:length(argv)]
+  file.list<-argv[4:length(argv)]
   metric=argv[2]
   metricType = argv[1]
+  repNumber = argv[3]
   if(metric=='correlation' && metricType=='sample'){
     tab<-combinePatientCors(file.list,metric)
     print(dim(tab))
-    write.table(tab,paste0('combined-', metricType, '-', metric,'.tsv'),row.names=F,col.names=T)
+    write.table(tab,paste0('combined-', metricType, '-', metric,'-',repNumber,'.tsv'),row.names=F,col.names=T)
   }else if(metric=='correlation' && metricType=='cellType'){
     tab<-combineCellTypeCors(file.list,metric)
     print(dim(tab))
-    write.table(tab,paste0('combined-', metricType, '-', metric,'.tsv'),row.names=F,col.names=T)
+    write.table(tab,paste0('combined-', metricType, '-', metric,'-',repNumber,'.tsv'),row.names=F,col.names=T)
   } else if (metric=='meanCorrelation'){
     tab<-combineCorsMean(file.list,metric, metricType)
     print(dim(tab))
-    write.table(tab,paste0('combined-', metricType, '-', metric,'.tsv'),row.names=F,col.names=T)
+    write.table(tab,paste0('combined-', metricType, '-', metric,'-',repNumber,'.tsv'),row.names=F,col.names=T)
   }else if(metric=='distance'){
     tab<-combineDists(file.list,metric,metricType)
     print(dim(tab))
-    write.table(tab,paste0('combined-', metricType, '-', metric,'.tsv'),row.names=F,col.names=T)
+    write.table(tab,paste0('combined-', metricType, '-', metric,'-',repNumber,'.tsv'),row.names=F,col.names=T)
   }else{
     print("First argument must be metricType and second must be metric name")
 
diff --git a/metrics/figures/plot-figs.cwl b/metrics/figures/plot-figs.cwl
index 98148d34..48380319 100644
--- a/metrics/figures/plot-figs.cwl
+++ b/metrics/figures/plot-figs.cwl
@@ -26,6 +26,10 @@ inputs:
        position: 2
    files:
      type: File[]
+     inputBinding:
+        position: 4
+   repNumber:
+     type: int
      inputBinding:
         position: 3
 
diff --git a/protData/protDataSetsCLI.py b/protData/protDataSetsCLI.py
index 586f37ee..ecdc21dd 100644
--- a/protData/protDataSetsCLI.py
+++ b/protData/protDataSetsCLI.py
@@ -20,10 +20,10 @@ def main():
         dat = cptac.Brca()
     elif opts.type.lower() == 'ccrcc':
         dat = cptac.Ccrcc()
-    elif opts.type.lower() == 'colon':
-        dat = cptac.Colon()
-    elif opts.type.lower() == 'endometrial':
-        dat = cptac.Endometrial()
+    elif opts.type.lower() == 'coad':
+        dat = cptac.Coad()
+    elif opts.type.lower() == 'ucec':
+        dat = cptac.Ucec()
     elif opts.type.lower() == 'gbm':
         dat = cptac.Gbm()
     elif opts.type.lower() == 'hnscc':
@@ -33,7 +33,9 @@ def main():
     elif opts.type.lower() == 'luad':
         dat = cptac.Luad()
     elif opts.type.lower() == 'ovarian':
-        dat = cptac.Ovarian()
+        dat = cptac.Ov()
+    elif opts.type.loewr() == 'pdac':
+        dat = cptac.Pdac()
     else:
         exit()
     df = dat.get_proteomics()
diff --git a/signature_matrices/getSigMatrices.R b/signature_matrices/getSigMatrices.R
index 21db6724..db4dc80c 100644
--- a/signature_matrices/getSigMatrices.R
+++ b/signature_matrices/getSigMatrices.R
@@ -7,8 +7,8 @@ main<-function(){
     argv <- commandArgs(trailingOnly = TRUE)
     sig_name <- trimws(argv[1])
 
-    sample <-as.numeric(trimws(argv[2]))
-    print(sample)    
+    sampval <-as.numeric(trimws(argv[2]))
+    print(samval)    
     #reshape matrisome to adhere to standards
     if(tolower(sig_name)=='matrisome'){
       tab <- readxl::read_xlsx('/Hs_Matrisome_Masterlist_Naba et al_2012.xlsx')[,c(2:3)]|>
@@ -25,10 +25,10 @@ main<-function(){
     tab <- read.table(paste0('/',sig_name,'.txt'),header=T,sep='\t',row.names=1,check.names=F)
     num<-nrow(tab)
     prots<-rownames(tab)
-    subsamp<-sample(prots,size=round(length(prots)*sample/100),replace=FALSE)
+    subsamp<-sample(prots,size=round(length(prots)*sampval/100),replace=FALSE)
     print(paste('reducing signature matrix from',nrow(tab),'to',length(subsamp)))
     tab<-tab[subsamp,]
-    sig_name <- paste0(sig_name,'_',sample)
+    sig_name <- paste0(sig_name,'_',sampval)
     write.table(tab,paste0('./',sig_name,'.txt'),row.names=T,sep='\t',quote=F)
 
 }
diff --git a/simulatedData/mapSimDataMatrices.R b/simulatedData/mapSimDataMatrices.R
index 0920758d..9c4eac9c 100644
--- a/simulatedData/mapSimDataMatrices.R
+++ b/simulatedData/mapSimDataMatrices.R
@@ -8,6 +8,7 @@ main <- function(){
     print(argv)
     deconv.mat <- read.table(argv[1],header=T,row.names=1,sep='\t', check.names = F) #matrix to be fixed
     sig.mat <- argv[2] #signature used to deconvolve matrix
+    sig.mat <-stringr::str_replace(sig.mat,'_[0-9]*','') ##added this due to sampling changes!!!
     sim.type <- tolower(argv[3]) #type of simulation performed
     cell.type <-read.table(argv[4],header=T,row.names=1,sep='\t', check.names = F)