diff --git a/config/config.yaml b/config/config.yaml
index c4a9a01..17c4d44 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -6,7 +6,7 @@ User:
   output_dir: /path/to/output_dir
   sample_map: /path/to/sample_map.tsv
   cancer_cell_type: HGSOC
-
+  
 # Change if you use a custom reference
 Reference:
   genome: /../ref/GRCh38_gencode_v44_CTAT_lib_Oct292023.plug-n-play/ctat_genome_lib_build_dir/ref_genome.fa
@@ -78,7 +78,7 @@ SNVCalling:
     Min_cell_types: 2
     min_distance: 0
     max_gnomAD_VAF: 0.01
-    deltaVAF: 0.1
+    deltaVAF: 0.05
     deltaMCF: 0.3
     min_ac_reads: 3
     min_ac_cells: 2
@@ -116,7 +116,7 @@ CellClust:
     FN: -1
     estimator: 'posterior'
     pp: [1,1]
-    dpa: [1,1]
+    dpa: [0.001, 5.0]
 
  ### CNA Calling 
 inferCNV:
diff --git a/profile/config.yaml b/profile/config.yaml
index 1924afa..b0be342 100644
--- a/profile/config.yaml
+++ b/profile/config.yaml
@@ -5,7 +5,7 @@ jobs: 500
 cores: 500
 
 default-resources:
-  mem_mb_per_cpu: max(2*input.size_mb, 4096)
+  mem_mb_per_cpu: max(2*input.size_mb, 8192)
   runtime: 240
   slurm_account: "'es_beere'"
   #tmpdir: /cluster/scratch/
diff --git a/run_LongSom.sh b/run_LongSom.sh
index 23da61a..a7ad97c 100755
--- a/run_LongSom.sh
+++ b/run_LongSom.sh
@@ -3,7 +3,7 @@ OUTPUT_DIR=/path/to/output_dir
 REF_DIR=path/to/LongSom/ref/GRCh38_gencode_v44_CTAT_lib_Oct292023.plug-n-play/ctat_genome_lib_build_dir
 
 snakemake \
-  -s workflow/LongSom.smk \
+  -s workflow/Snakefile \
   --configfile config/config.yaml \
   --use-conda \
   --use-singularity \
diff --git a/run_LongSom_slurm.sh b/run_LongSom_slurm.sh
index 0a49dfd..f5e0e61 100755
--- a/run_LongSom_slurm.sh
+++ b/run_LongSom_slurm.sh
@@ -10,7 +10,7 @@ sbatch \
   -e logs/snakelog.err \
 snakemake \
   -s workflow/Snakefile \
-  --configfile workflow/test.yaml \
+  --configfile config/config.yaml \
   --profile profile/ \
   --use-conda \
   --use-singularity \
diff --git a/workflow/rules/CellClustering.smk b/workflow/rules/CellClustering.smk
index 8878404..97a33cd 100644
--- a/workflow/rules/CellClustering.smk
+++ b/workflow/rules/CellClustering.smk
@@ -57,7 +57,7 @@ else:
             tsv="SNVCalling/BaseCellCalling/{id}.calling.step3.tsv",
             bam=f"{INPUT}/bam/{{id}}.bam",
             barcodes="CellTypeReannotation/ReannotatedCellTypes/{id}.tsv",
-            fusions="FusionCalling/Somatic/{id}.Fusions.tsv" if CTATFUSION else [],
+            fusions="FusionCalling/Somatic/{id}.Fusions.SingleCellGenotype.tsv" if CTATFUSION else [],
             ref=str(workflow.basedir)+config['Reference']['genome'],
         output:
             tsv="CellClustering/SingleCellGenotype/{id}.SingleCellGenotype.tsv",
@@ -106,11 +106,11 @@ rule FormatInputBnpC:
     input:
         bin="CellClustering/SingleCellGenotype/{id}.BinaryMatrix.tsv",
         vaf="CellClustering/SingleCellGenotype/{id}.VAFMatrix.tsv",
-        ctypes="CellTypeReannotation/ReannotatedCellTypes/{id}.tsv",
+        barcodes="CellTypeReannotation/ReannotatedCellTypes/{id}.tsv",
     output:
         bin="CellClustering/BnpC_input/{id}.BinaryMatrix.tsv",
         vaf="CellClustering/BnpC_input/{id}.VAFMatrix.tsv",
-        ctypes="CellClustering/BnpC_input/{id}.Barcodes.tsv",
+        barcodes="CellClustering/BnpC_input/{id}.Barcodes.tsv",
     params:
         script=str(workflow.basedir)+"/scripts/CellClustering/FormatInputBnpC.py",
         min_cells=config['CellClust']['FormatInput']['min_cells_per_mut'],
@@ -126,7 +126,7 @@ rule FormatInputBnpC:
         python {params.script} \
         --bin {input.bin} \
         --vaf {input.vaf} \
-        --ctypes {input.ctypes} \
+        --barcodes {input.barcodes} \
         --min_pos_cov {params.min_cov} \
         --min_cells_per_mut {params.min_cells} \
         --outfile CellClustering/BnpC_input//{wildcards.id} 
@@ -136,7 +136,7 @@ rule BnpC_clustering:
     input:
         bin="CellClustering/BnpC_input/{id}.BinaryMatrix.tsv",
         vaf="CellClustering/BnpC_input/{id}.VAFMatrix.tsv",
-        ctypes="CellClustering/BnpC_input/{id}.Barcodes.tsv",
+        barcodes="CellClustering/BnpC_input/{id}.Barcodes.tsv",
     output:
         pdf="CellClustering/BnpC_output/{id}/genoCluster_posterior_mean_raw.pdf"
     params:
@@ -172,5 +172,5 @@ rule BnpC_clustering:
         -FN {params.FN} \
         -pp {params.pp} \
         -ap {params.dpa} \
-        --ctypes {input.ctypes} 
+        --barcodes {input.barcodes} 
         """
diff --git a/workflow/scripts/CellClustering/FormatInputBnpC.py b/workflow/scripts/CellClustering/FormatInputBnpC.py
index 8571228..9901e65 100644
--- a/workflow/scripts/CellClustering/FormatInputBnpC.py
+++ b/workflow/scripts/CellClustering/FormatInputBnpC.py
@@ -1,14 +1,12 @@
 import timeit
 import argparse
 import pandas as pd
-import matplotlib
-import matplotlib.pyplot as plt
 import numpy as np
 
-def filter_input(bin,vaf,ctypes,min_cells_per_mut,min_pos_cov,out_prefix):
+def filter_input(bin,vaf,barcodes,min_cells_per_mut,min_pos_cov,out_prefix):
 	bin = pd.read_csv(bin,sep='\t',index_col=0,na_values=[3,'.'])
 	vaf = pd.read_csv(vaf,sep='\t',index_col=0,na_values=[3,'.'])
-	ctypes = pd.read_csv(ctypes,sep='\t')
+	barcodes = pd.read_csv(barcodes,sep='\t')
 	#Save fusions:
 	fusions = [i for i in bin.index if '--' in i]
 	fusions_save = bin.loc[fusions,bin.columns]
@@ -25,19 +23,22 @@ def filter_input(bin,vaf,ctypes,min_cells_per_mut,min_pos_cov,out_prefix):
 	
 	# Filter all input files:
 	vaf = vaf.loc[idx,cols]
-	ctypes = ctypes[ctypes['Index'].isin(cols)]
+	barcodes = barcodes[barcodes['Index'].isin(cols)]
 	bin = pd.concat([bin,fusions_save[cols]])
+
+	# Add reanno ctype colors
+	barcodes['Cell_Reanno_Colors'] = barcodes['Reannotated_cell_type'].apply(lambda x: '#94C773' if x=='Non-Cancer' else '#8F79A1')
 	
 	# Write
 	bin.to_csv(out_prefix + '.BinaryMatrix.tsv', sep='\t')
 	vaf.to_csv(out_prefix + '.VAFMatrix.tsv', sep='\t')
-	ctypes.to_csv(out_prefix + '.Barcodes.tsv', sep='\t', index = False)
+	barcodes.to_csv(out_prefix + '.Barcodes.tsv', sep='\t', index = False)
 
 def initialize_parser():
 	parser = argparse.ArgumentParser(description='Script to filter BnpC input matrix')
 	parser.add_argument('--bin', type=str, default=1, help='SComatic binary matrix (obtained by SingleCellGenotype.py)', required = True)
 	parser.add_argument('--vaf', type=str, default=1, help='SComatic VAF matrix (obtained by SingleCellGenotype.py)', required = True)
-	parser.add_argument('--ctypes', type=str, default=1, help='Barcode to celltypes (obtained by CellTypeReannotation.py)', required = True)
+	parser.add_argument('--barcodes', type=str, default=1, help='Barcode to celltypes (obtained by CellTypeReannotation.py)', required = True)
 	parser.add_argument('--min_cells_per_mut', type=int, default=5, help='SComatic+CellTypeReannotation base calling file (obtained by BaseCellCalling.step3.py)', required = False)
 	parser.add_argument('--min_pos_cov', type=int, default=3, help='SComatic+CellTypeReannotation base calling file (obtained by BaseCellCalling.step3.py)', required = False)
 	parser.add_argument('--outfile', default = 'Matrix.tsv', help='Out file', required = False)
@@ -51,7 +52,7 @@ def main():
 
 	bin = args.bin
 	vaf = args.vaf
-	ctypes = args.ctypes
+	barcodes = args.barcodes
 	min_cells_per_mut = args.min_cells_per_mut
 	min_pos_cov = args.min_pos_cov
 	out_prefix = args.outfile
@@ -60,7 +61,7 @@ def main():
 	print("Outfile prefix: " , out_prefix ,  "\n") 
 
 	# 1. Create clinical annotation file
-	filter_input(bin,vaf,ctypes,min_cells_per_mut,min_pos_cov,out_prefix)
+	filter_input(bin,vaf,barcodes,min_cells_per_mut,min_pos_cov,out_prefix)
 
 if __name__ == '__main__':
 	start = timeit.default_timer()
diff --git a/workflow/scripts/CellClustering/libs/__pycache__/CRP.cpython-312.pyc b/workflow/scripts/CellClustering/libs/__pycache__/CRP.cpython-312.pyc
new file mode 100644
index 0000000..b8fb63d
Binary files /dev/null and b/workflow/scripts/CellClustering/libs/__pycache__/CRP.cpython-312.pyc differ
diff --git a/workflow/scripts/CellClustering/libs/__pycache__/CRP_learning_errors.cpython-312.pyc b/workflow/scripts/CellClustering/libs/__pycache__/CRP_learning_errors.cpython-312.pyc
new file mode 100644
index 0000000..77221b2
Binary files /dev/null and b/workflow/scripts/CellClustering/libs/__pycache__/CRP_learning_errors.cpython-312.pyc differ
diff --git a/workflow/scripts/CellClustering/libs/__pycache__/MCMC.cpython-312.pyc b/workflow/scripts/CellClustering/libs/__pycache__/MCMC.cpython-312.pyc
new file mode 100644
index 0000000..3f4e6ee
Binary files /dev/null and b/workflow/scripts/CellClustering/libs/__pycache__/MCMC.cpython-312.pyc differ
diff --git a/workflow/scripts/CellClustering/libs/__pycache__/__init__.cpython-312.pyc b/workflow/scripts/CellClustering/libs/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000..8e0b037
Binary files /dev/null and b/workflow/scripts/CellClustering/libs/__pycache__/__init__.cpython-312.pyc differ
diff --git a/workflow/scripts/CellClustering/libs/__pycache__/dpmmIO.cpython-312.pyc b/workflow/scripts/CellClustering/libs/__pycache__/dpmmIO.cpython-312.pyc
new file mode 100644
index 0000000..3a1ba53
Binary files /dev/null and b/workflow/scripts/CellClustering/libs/__pycache__/dpmmIO.cpython-312.pyc differ
diff --git a/workflow/scripts/CellClustering/libs/__pycache__/plotting.cpython-312.pyc b/workflow/scripts/CellClustering/libs/__pycache__/plotting.cpython-312.pyc
new file mode 100644
index 0000000..7b7671c
Binary files /dev/null and b/workflow/scripts/CellClustering/libs/__pycache__/plotting.cpython-312.pyc differ
diff --git a/workflow/scripts/CellClustering/libs/__pycache__/utils.cpython-312.pyc b/workflow/scripts/CellClustering/libs/__pycache__/utils.cpython-312.pyc
new file mode 100644
index 0000000..2623f9c
Binary files /dev/null and b/workflow/scripts/CellClustering/libs/__pycache__/utils.cpython-312.pyc differ
diff --git a/workflow/scripts/CellClustering/libs/dpmmIO.py b/workflow/scripts/CellClustering/libs/dpmmIO.py
index 5a39c03..a594934 100644
--- a/workflow/scripts/CellClustering/libs/dpmmIO.py
+++ b/workflow/scripts/CellClustering/libs/dpmmIO.py
@@ -275,7 +275,7 @@ def save_similarity(args, inferred, results, out_dir):
 
 
 def save_geno_plots(args, data, data_raw, out_dir, names):
-    ctypes = pd.read_csv(args.ctypes, sep='\t')
+    barcodes = pd.read_csv(args.barcodes, sep='\t')
     row_cl = False
     if args.mut_order:
         row_cl = list(pd.read_csv(args.mut_order, sep='\t')['INDEX'])
@@ -289,11 +289,11 @@ def save_geno_plots(args, data, data_raw, out_dir, names):
             df_obs = pd.DataFrame(data_raw, index=names[0], columns=names[1]).T
             pl.plot_raw_data(
                 data_est['genotypes'], df_obs, assignment=data_est['assignment'],
-                ctypes=ctypes, out_file=out_file, row_cl=row_cl
+                barcodes=barcodes, out_file=out_file, row_cl=row_cl
             )
             pl.plot_raw_data(
                 df_obs, df_obs, assignment=data_est['assignment'],
-                ctypes=ctypes, out_file=out_file_raw, row_cl=row_cl
+                barcodes=barcodes, out_file=out_file_raw, row_cl=row_cl
             )
 
 
diff --git a/workflow/scripts/CellClustering/libs/plotting.py b/workflow/scripts/CellClustering/libs/plotting.py
index fb2a958..888e72b 100644
--- a/workflow/scripts/CellClustering/libs/plotting.py
+++ b/workflow/scripts/CellClustering/libs/plotting.py
@@ -64,7 +64,7 @@ def _get_col_order(assignment):
 
 def plot_raw_data(data_in, data_raw_in=pd.DataFrame(), out_file=None,
             assignment=np.array([]), metric='correlation', row_cl=True,
-            ctypes=pd.Series()):
+            barcodes=pd.Series()):
 
     data = data_in.copy()
     data_raw = data_raw_in.copy()
@@ -93,7 +93,7 @@ def plot_raw_data(data_in, data_raw_in=pd.DataFrame(), out_file=None,
                 
         cluster_cols = pd.Series(col_dict, name='clusters', index=col_order)
 
-        ctypes = ctypes.reindex([ctypes.index[i] for i in col_order])
+        barcodes = barcodes.reindex([barcodes.index[i] for i in col_order])
 
         data.columns = np.arange(data_in.shape[1])
         data = data[col_order]
@@ -137,7 +137,7 @@ def plot_raw_data(data_in, data_raw_in=pd.DataFrame(), out_file=None,
     cmap.set_over('green')
     cmap.set_bad('grey')
 
-    col_colors=[ctypes['Cancer_Color'],cluster_cols]
+    col_colors=[barcodes['Cell_Reanno_Colors'],cluster_cols]
 
     cm = sns.clustermap(
         data,
diff --git a/workflow/scripts/CellClustering/run_BnpC.py b/workflow/scripts/CellClustering/run_BnpC.py
index be7df58..35bca1d 100644
--- a/workflow/scripts/CellClustering/run_BnpC.py
+++ b/workflow/scripts/CellClustering/run_BnpC.py
@@ -55,7 +55,7 @@ def check_PSRF_cutoff(val):
         help='Run single chain in main python thread for debugging with pdb.'
     )
     parser.add_argument(
-        '--ctypes', type=str,
+        '--barcodes', type=str,
         help='Absolute or relative path(s) to input barcode-to-ctype file'
     )
     parser.add_argument(
diff --git a/workflow/scripts/CellTypeReannotation/HighConfidenceCancerVariants.py b/workflow/scripts/CellTypeReannotation/HighConfidenceCancerVariants.py
index 4233580..95c283b 100644
--- a/workflow/scripts/CellTypeReannotation/HighConfidenceCancerVariants.py
+++ b/workflow/scripts/CellTypeReannotation/HighConfidenceCancerVariants.py
@@ -46,9 +46,9 @@ def HCCV_SNV(SNVs,outfile,min_dp,deltaVAF,deltaMCF,clust_dist):
 	input_df['DP_FILTER'] = input_df.apply(lambda x: 
 		DP_filtering(x['Cancer'],x['Non-Cancer'],min_dp), axis=1)
 	input_df = input_df[input_df['DP_FILTER']=='PASS']
+	input_df.to_csv(outfile+'2', sep='\t', index=False,  mode='a')
 
 	#Special case for chrM due to contaminants:
-
 	# Save chrM candidate SNVs to apply specific filters
 	chrm_df = input_df[input_df['#CHROM']=='chrM'].copy()
 	input_df = input_df[input_df['#CHROM']!='chrM']
@@ -77,6 +77,7 @@ def HCCV_SNV(SNVs,outfile,min_dp,deltaVAF,deltaMCF,clust_dist):
 	# Filter 7: PoN filterDelta VAF and MCF filtering
 	input_df['HCCV_FILTER'] = input_df.apply(lambda x: 
 		MCF_filtering(x['Cell_types'],x['VAF'], x['MCF'],deltaVAF,deltaMCF), axis=1)
+	input_df.to_csv(outfile+'3', sep='\t', index=False,  mode='a')
 	input_df = input_df[input_df['HCCV_FILTER']=='PASS']
 
 	# Filter 8: Distance filter
@@ -229,16 +230,19 @@ def MCF_filtering(CTYPES,VAF,MCF,deltaVAFmin,deltaMCFmin):
 			VAFNonCancer = float(VAFs[0])
 			MCFCancer = float(MCFs[1])
 			MCFNonCancer = float(MCFs[0])
-
-		if VAFNonCancer>0.12:
-			return 'Heterozygous'
 		
 		if VAFCancer<0.05: 
 			return 'NonSig'
 
-		# deltaVAF = VAFCancer-VAFNonCancer
+		deltaVAF = VAFCancer-VAFNonCancer
 		deltaMCF = MCFCancer-MCFNonCancer
 
+		if VAFNonCancer>0.1 and deltaVAF<2*deltaVAFmin:
+			return 'Heterozygous'
+		
+		if VAFNonCancer>0.2:
+			return 'Heterozygous'
+
 		# HCCV are variants with high VAF/MCF in cancer and low VAF/MCF in non-cancer cells
 		# if deltaVAF < deltaVAFmin:
 		#	return 'LowDeltaVAF'
diff --git a/workflow/scripts/SNVCalling/BaseCellCalling.step3.py b/workflow/scripts/SNVCalling/BaseCellCalling.step3.py
index 6363220..505014a 100644
--- a/workflow/scripts/SNVCalling/BaseCellCalling.step3.py
+++ b/workflow/scripts/SNVCalling/BaseCellCalling.step3.py
@@ -30,23 +30,21 @@ def variant_calling_step3(file,out_prefix,deltaVAF,deltaMCF,chrM_conta,min_ac_re
 	f3.write('##INFO=FINAL_FILTER,Description=Final ilter status, including chrM contaminants and clustered sites\n')	
 	f2.close()
 	f3.close()
-
 	
 	input_df = pd.read_csv(file, sep='\t',comment='#',names=output_column_names)
-	input_df['INDEX'] = input_df['#CHROM'].astype(str) + ':' + input_df['Start'].astype(str) + ':' + input_df['ALT'].str.split(',', n=1, expand=True)[0]
-	
-	# Only keeping sites with alternative reads in cancer
+
+	# Only keeping sites called in cancer
 	input_df = input_df[input_df['Cell_types']!='Non-Cancer']
 
 	# Filtering multiallelic sites, keeping only one allele if it's 50x larger than the other
 	# Otherwise deleting the line (it messes with filters later on)
 	# This is important as SComatic will often detect low expr secondary alt allele in very high expr. (e.g. in chrM)
-	input_df[['ALT', 'FILTER', 'Cell_types', 'Bc', 'Cc', 'VAF', 'MCF','MultiAllelic_filter']] =  input_df.apply(lambda x: MultiAllelic_filtering(x['REF'], x['ALT'], x['FILTER'], x['Cell_types'],x['Dp'], x['Nc'], x['Bc'], x['Cc'], x['VAF'], x['MCF'], x['Cancer'], x['Non-Cancer']), axis=1, result_type="expand") 
-	input_df = input_df[input_df['MultiAllelic_filter']=='KEEP']
-	input_df = input_df[output_column_names + ['INDEX']]
+	input_df[['ALT', 'FILTER', 'Cell_types', 'Bc', 'Cc', 'VAF', 'MCF','STEP3FILTER']] =  input_df.apply(lambda x: MultiAllelic_filtering(x['REF'], x['ALT'], x['FILTER'], x['Cell_types'],x['Dp'], x['Nc'], x['Bc'], x['Cc'], x['VAF'], x['MCF'], x['Cancer'], x['Non-Cancer']), axis=1, result_type="expand") 
+	#input_df = input_df[input_df['MultiAllelic_filter']=='KEEP']
 
-	#Special case for chrM due to contaminants:
+	input_df['INDEX'] = input_df['#CHROM'].astype(str) + ':' + input_df['Start'].astype(str) + ':' + input_df['ALT'].str.split(',', n=1, expand=True)[0]
 
+	#Special case for chrM due to contaminants:
 	# Save chrM candidate SNVs to apply specific filters
 	chrm_df = input_df[input_df['#CHROM']=='chrM'].copy()
 	input_df = input_df[input_df['#CHROM']!='chrM']
@@ -55,27 +53,17 @@ def variant_calling_step3(file,out_prefix,deltaVAF,deltaMCF,chrM_conta,min_ac_re
 	chrm_df = chrm_df[~chrm_df['FILTER'].str.contains('Min|LR|gnomAD|LC|RNA', regex=True)]
 	if len(chrm_df)>0:
 		# Applying deltaVAF and deltaMCF filters
-		chrm_df['FILTER'] = chrm_df.apply(lambda x: 
-			chrM_filtering(x['Cell_types'],x['Dp'],x['VAF'], x['MCF'],deltaVAF,deltaMCF), axis=1)
-
+		chrm_df['STEP3FILTER'] = chrm_df.apply(lambda x: 
+			chrM_filtering(x['STEP3FILTER'],x['Cell_types'],x['Dp'],x['VAF'], x['MCF'],deltaVAF,deltaMCF), axis=1)
 
 	# Filter 1: Non-cancer coverage filter
 	input_df = input_df[~input_df['FILTER'].str.contains('Min_cell_types')]
-	
-	# Filter 2: Alt reads and cells in cancer filter
-	input_df['Bc'] = [i.split(',')[1] if ',' in i else i for i in input_df['Bc']] #only keeping cancer info
-	input_df['Cc'] = [i.split(',')[1] if ',' in i else i for i in input_df['Cc']] #only keeping cancer info
-	input_df = input_df.astype({'Bc':'int','Cc':'int'})
-	input_df = input_df[input_df['Bc']>=min_ac_reads]
-	input_df = input_df[input_df['Cc']>=min_ac_cells]
-
-	# Filter 3: Betabinomial filter in cancer cells
-	input_df = input_df[~input_df['Cell_type_Filter'].str.contains(',Non-Significant|,Low-Significant', regex = True)]
-	input_df = input_df[~input_df['Cell_type_Filter'].isin(['Non-Significant','Low-Significant'])]
-	
-	#Adding chrM SNVs detected above:
-	unfiltered_df = pd.concat([input_df,chrm_df])
-	unfiltered_df.to_csv(out_prefix+ '.calling.step3.unfiltered.tsv', sep='\t', index=False,  mode='a')
+
+	#Filter 2: Min. alt reads and cells in cancer
+	input_df['STEP3FILTER'] = input_df.apply(lambda x: BC_CC_filtering(x['STEP3FILTER'],x['ALT'],x['Cancer'],min_ac_reads,min_ac_cells), axis=1)
+
+	# Filter 3 and 8: Betabinomial filtering in cancer cells (Filter 3) and non-cancer cells (Filter 8)
+	input_df['STEP3FILTER'] = input_df.apply(lambda x: BetaBino_filtering(x['STEP3FILTER'],x['Cell_types'],x['Cell_type_Filter'],x['Start']), axis=1)
 
 	# Filter 4: Noise filter:
 	input_df = input_df[~input_df['FILTER'].str.contains('Noisy_site')] #multi allelic sites are filtered above
@@ -90,26 +78,27 @@ def variant_calling_step3(file,out_prefix,deltaVAF,deltaMCF,chrM_conta,min_ac_re
 	input_df = input_df[~input_df['FILTER'].str.contains('PoN', regex = True)]
 	
 	# Filter 8: Betabinomial filter in non-cancer cells
-	input_df = input_df[~input_df['Cell_type_Filter'].str.contains('Low-Significant,|PASS,', regex = True)]
-	input_df = input_df[~input_df['FILTER'].str.contains('Cell_type_noise|Multiple_cell_types|Noisy_site', regex = True)]
+	input_df = input_df[~input_df['FILTER'].str.contains('Cell_type_noise', regex = True)]
 
 	# Filter 9: gnomAD filter
 	input_df = input_df[~input_df['FILTER'].str.contains('gnomAD', regex = True)]
 
-	# Filter 10: Distance filter
-	input_df['FILTER'] = tag_clustered_SNVs(input_df, clust_dist)
-	input_df = input_df[~input_df['FILTER'].str.contains('dist', regex = True)]
-	
 	#Adding chrM SNVs detected above:
 	input_df = pd.concat([input_df,chrm_df])
 
+	# Filter 10: Distance filter
+	input_df['STEP3FILTER'] = tag_clustered_SNVs(input_df, clust_dist)
+	input_df.to_csv(out_prefix+ '.calling.step3.unfiltered.tsv', sep='\t', index=False,  mode='a')
+	input_df = input_df[~input_df['STEP3FILTER'].str.contains('dist', regex = True)]
+
 	#Filtering PASS SNVs (only left in chrM)
-	filtered_df = input_df[input_df['FILTER'] =='PASS']
+	filtered_df = input_df[input_df['STEP3FILTER'] =='PASS']
 
 	# Write output
 	filtered_df.to_csv(out_prefix+ '.calling.step3.tsv', sep='\t', index=False,  mode='a')
+
 	
-def chrM_filtering(CTYPES,DP,VAF,MCF,deltaVAFmin,deltaMCFmin):
+def chrM_filtering(STEP3FILTER,CTYPES,DP,VAF,MCF,deltaVAFmin,deltaMCFmin):
 	ctypes = CTYPES.split(',')
 	if len(ctypes)>1:
 		if ctypes[0]=='Cancer':
@@ -121,7 +110,10 @@ def chrM_filtering(CTYPES,DP,VAF,MCF,deltaVAFmin,deltaMCFmin):
 		DP1,DP2=DP.split(',')
 		#Only look at higher depth loci
 		if int(DP1)<100 or int(DP2)<100: 
-			return 'LowDepth'
+			if STEP3FILTER == 'PASS':
+				return 'LowDepth'
+			else:
+				return STEP3FILTER+',LowDepth'
 		else: 
 			VAFs = VAF.split(',')
 			MCFs = MCF.split(',')
@@ -135,24 +127,38 @@ def chrM_filtering(CTYPES,DP,VAF,MCF,deltaVAFmin,deltaMCFmin):
 			
 			# mtSNVs are variants with high VAF in cancer and low VAF in non-cancer cells
 			if deltaVAF < deltaVAFmin:
-				return 'LowDeltaVAF'
+				if STEP3FILTER == 'PASS':
+					return 'LowDeltaVAF'
+				else:
+					return STEP3FILTER+',LowDeltaVAF'
+
 		
 			elif deltaMCF < deltaMCFmin:
-				return 'LowDeltaMCF'
+				if STEP3FILTER == 'PASS':
+					return 'LowDeltaMCF'
+				else:
+					return STEP3FILTER+',LowDeltaMCF'
 			else:
-				return 'PASS'
+				return STEP3FILTER
 
 	elif len(ctypes)==1:
-		if ctypes[0]!="Cancer":
-			return 'Non-Cancer'
-		elif int(DP)<100:
-			return 'LowDepth'
+		if int(DP)<100:
+			if STEP3FILTER == 'PASS':
+				return 'LowDepth'
+			else:
+				return STEP3FILTER+',LowDepth'
 		elif float(VAF)<0.05:
-			return 'LowVAF'
+			if STEP3FILTER == 'PASS':
+				return 'LowVAF'
+			else:
+				return STEP3FILTER+',LowVAF'
 		elif float(MCF)<0.05:
-			return 'LowMCF'
+			if STEP3FILTER == 'PASS':
+				return 'LowMCF'
+			else:
+				return STEP3FILTER+',LowMCF'
 		else:
-			return 'PASS'
+			return STEP3FILTER
 
 def MultiAllelic_filtering(REF, ALT, FILTER, CTYPES, DP, NC, BC, CC, VAF, MCF, CancerInfo, NonCancerInfo):
 	ref_dict = {'A':0, 'C':1, 'T':2, 'G':3}
@@ -174,8 +180,6 @@ def MultiAllelic_filtering(REF, ALT, FILTER, CTYPES, DP, NC, BC, CC, VAF, MCF, C
 			index = np.argmax(BCS)
 			BCS[index] = 0 # removing max to select next "max"
 			MAX2 = max(BCS)
-			if not(MAX2/MAX<0.05): # one alt 20x larger than the other)
-				return ALT, FILTER, CTYPES, BC, CC, VAF, MCF,'DELETE'
 
 			ALT_Cancer = 'ACTG'[index]
 			BC_Cancer = int(CancerInfo.split('|')[3].split(':')[index])
@@ -195,42 +199,89 @@ def MultiAllelic_filtering(REF, ALT, FILTER, CTYPES, DP, NC, BC, CC, VAF, MCF, C
 			VAF = ','.join([str(VAF_NonCancer),str(VAF_Cancer)])
 			MCF = ','.join([str(MCF_NonCancer),str(MCF_Cancer)])
 
+			if not(MAX2/MAX<0.05): # one alt 20x larger than the other)
+				return ALT, FILTER, CTYPES, BC, CC, VAF, MCF,'Multi-Allelic'
+
+			return ALT, FILTER, CTYPES, BC, CC, VAF, MCF,'PASS'
+		
+		elif len(ctypes)==1:
+			BCS = CancerInfo.split('|')[3].split(':')[:4]
+			BCS = [int(i) for i in BCS]
+			BCS[i_ref] = 0 # setting REF to 0 as we only consider ALT
+			MAX = max(BCS)
+			index = np.argmax(BCS)
+			BCS[index] = 0 # removing max to select next "max"
+			MAX2 = max(BCS)
+
+			ALT = 'ACTG'[index]
+			BC = int(CancerInfo.split('|')[3].split(':')[index])
+			CC = int(CancerInfo.split('|')[2].split(':')[index])
+			VAF = round(BC/int(DP),4)
+			MCF= round(CC/int(NC),4)
+
 			FILTER = FILTER.replace('Multi-allelic,','')
 			FILTER = FILTER.replace(',Multi-allelic','')
 			FILTER = FILTER.replace('Multi-allelic','')
 
-			return ALT, FILTER, CTYPES, BC, CC, VAF, MCF,'KEEP'
-		
-		elif len(ctypes)==1:
-			if ctypes[0]=='Cancer':
-				BCS = CancerInfo.split('|')[3].split(':')[:4]
-				BCS = [int(i) for i in BCS]
-				BCS[i_ref] = 0 # setting REF to 0 as we only consider ALT
-				MAX = max(BCS)
-				index = np.argmax(BCS)
-				BCS[index] = 0 # removing max to select next "max"
-				MAX2 = max(BCS)
-				if not(MAX2/MAX<0.05): # one alt 20x larger than the other)
-					return ALT, FILTER, CTYPES, BC, CC, VAF, MCF,'DELETE'
-
-				ALT = 'ACTG'[index]
-				BC = int(CancerInfo.split('|')[3].split(':')[index])
-				CC = int(CancerInfo.split('|')[2].split(':')[index])
-				VAF = round(BC/int(DP),4)
-				MCF= round(CC/int(NC),4)
-
-				FILTER = FILTER.replace('Multi-allelic,','')
-				FILTER = FILTER.replace(',Multi-allelic','')
-				FILTER = FILTER.replace('Multi-allelic','')
-
-				return ALT, FILTER, CTYPES, BC, CC, VAF, MCF,'KEEP'
-			else:
-				return ALT, FILTER, CTYPES, BC, CC, VAF, MCF,'DELETE'
+			if not(MAX2/MAX<0.05): # one alt 20x larger than the other)
+				return ALT, FILTER, CTYPES, BC, CC, VAF, MCF,'Multi-Allelic'
+
+			return ALT, FILTER, CTYPES, BC, CC, VAF, MCF,'PASS'
 	else:	
-		return ALT, FILTER, CTYPES, BC, CC, VAF, MCF,'KEEP'
+		return ALT, FILTER, CTYPES, BC, CC, VAF, MCF,'PASS'
 	
+def BC_CC_filtering(STEP3FILTER,ALT,CancerInfo,min_ac_reads,min_ac_cells):
+	alt_dict = {'A':0, 'C':1, 'T':2, 'G':3}
+	i_alt = alt_dict[ALT[0]]
+	try:
+		CancerInfos = CancerInfo.split('|')
+		BC = CancerInfos[3].split(':')[i_alt]
+		CC = CancerInfos[2].split(':')[i_alt]
+		if int(BC)<min_ac_reads or int(CC)<min_ac_cells:
+			if STEP3FILTER == 'PASS':
+				return 'LowDepth'
+			else:
+				return STEP3FILTER + ',LowDepth'
+		else:
+			return STEP3FILTER
+	except AttributeError:
+		if STEP3FILTER == 'PASS':
+				return 'NoCov'
+		else:
+			return STEP3FILTER + ',NoCov'
+
+
+def BetaBino_filtering(STEP3FILTER,CTYPES,CTYPESFILTER,POS):
+	ctypes = CTYPES.split(',')
+	if len(ctypes) == 1:
+		if CTYPESFILTER in ['Non-Significant','Low-Significance']:
+			if STEP3FILTER == 'PASS':
+				return 'CancerNonSig'
+			else:
+				return STEP3FILTER + ',CancerNonSig'
+		return STEP3FILTER
+	elif len(ctypes) > 1:
+		if ctypes[0] == 'Cancer':
+			i_cancer = 0
+			i_noncancer = 1
+		elif ctypes[1] == 'Cancer':
+			i_cancer = 1
+			i_noncancer = 0
+		if CTYPESFILTER.split(',')[i_cancer] in ['Non-Significant','Low-Significance']:
+			if STEP3FILTER == 'PASS':
+				return 'CancerNonSig'
+			else:
+				return STEP3FILTER + ',CancerNonSig'
+		if CTYPESFILTER.split(',')[i_noncancer] in ['PASS','Low-Significance']:
+			if STEP3FILTER == 'PASS':
+				return 'NonCancerSig'
+			else:
+				return STEP3FILTER + ',NonCancerSig'
+		return STEP3FILTER
+
+
 def tag_clustered_SNVs(df, clust_dist):
-	df2 = df[df['FILTER']=='PASS']
+	df2 = df[df['STEP3FILTER']=='PASS']
 	idx = df2['INDEX']
 	a=[]
 	for i in idx:
@@ -249,13 +300,13 @@ def tag_clustered_SNVs(df, clust_dist):
 				trash.append(':'.join([chr2,pos2,base2]))
 	trash = set(trash)
 	updated_filter= df.apply(lambda x : 
-							   modify_filter(x['INDEX'],x['FILTER'], clust_dist, trash),
+							   modify_filter(x['INDEX'],x['STEP3FILTER'], clust_dist, trash),
 							   axis=1)
 	print(updated_filter)
 	return updated_filter
 
 def modify_filter(INDEX, FILTER, clust_dist, trash):
-	clustered = 'Clust_dist{}'.format(str(clust_dist))
+	clustered = 'Clust_dist_{}'.format(str(clust_dist))
 	if INDEX in trash:
 		if FILTER == 'PASS':
 			return clustered