From 3fea42724d8032844954f61ead8fd88625ee83e0 Mon Sep 17 00:00:00 2001
From: Nicolas HOMBERG <nicolashomberg@gricad-nh.u-ga.fr>
Date: Fri, 29 Nov 2024 15:38:32 +0100
Subject: [PATCH] ready to upload

---
 phase-1_2_3/automated_docker_test.sh          |   4 +-
 phase-1_2_3/bundle/FAQ.md                     |  10 +-
 phase-1_2_3/bundle/submission.md              |   6 +-
 phase-1_2_3/generate_bundle.sh                |  23 +-
 phase-1_2_3/generate_data.sh                  |   2 +
 .../ingestion_program/sub_ingestion.py        |   4 +-
 .../scoring_program/detailed_results.Rmd      |   2 +-
 phase-1_2_3/scoring_program/scoring.R         |   7 +-
 .../submission_script_installpkgcran.R        | 230 ----------------
 .../submission_script_nnlsmultimodalSource.R  |  17 +-
 .../starting_kit/submissions/program.R        |  13 +
 .../starting_kit/submissions/program.py       |  42 ---
 .../starting_kit_phase1/submission_script.R   | 246 -----------------
 .../starting_kit_phase1/submission_script.py  | 252 ------------------
 14 files changed, 52 insertions(+), 806 deletions(-)
 delete mode 100644 phase-1_2_3/starting_kit/submission_script_installpkgcran.R
 create mode 100644 phase-1_2_3/starting_kit/submissions/program.R
 delete mode 100644 phase-1_2_3/starting_kit/submissions/program.py
 delete mode 100644 phase-1_2_3/starting_kit_phase1/submission_script.R
 delete mode 100644 phase-1_2_3/starting_kit_phase1/submission_script.py

diff --git a/phase-1_2_3/automated_docker_test.sh b/phase-1_2_3/automated_docker_test.sh
index 52f1594..311b5d0 100644
--- a/phase-1_2_3/automated_docker_test.sh
+++ b/phase-1_2_3/automated_docker_test.sh
@@ -37,8 +37,8 @@ echo "Create submission program"
 cd starting_kit/
 rm -rf submissions
 # Rscript submission_script.R >> logs
-# Rscript submission_script.R 
-python submission_script.py
+Rscript submission_script.R 
+# python submission_script.py
 cd - 
 echo "Done"
 
diff --git a/phase-1_2_3/bundle/FAQ.md b/phase-1_2_3/bundle/FAQ.md
index b2f7724..0f7a383 100644
--- a/phase-1_2_3/bundle/FAQ.md
+++ b/phase-1_2_3/bundle/FAQ.md
@@ -16,20 +16,20 @@ If your submission fails on Codabench, don't panic, review the logs! You can acc
 
 Try interactive docker or conda environnement, it is especially useful for python user!
 
-A conda environment is provided follow this steps to install and activate it. Retrieve the file [environment-r.yml](https://github.com/bcm-uga/hadaca3/blob/main/docker/codabench_hadaca3_pyr/environment/environment-r.yml) from github as follow:
+A conda environment is provided follow this steps to install and activate it. Retrieve the file [env_final_pyr.yml](https://github.com/bcm-uga/hadaca3/blob/main/docker/codabench_hadaca_final/environment/env_final_pyr.yml) from github as follow:
 
 ``` 
-wget https://raw.githubusercontent.com/bcm-uga/hadaca3/main/docker/codabench_hadaca3_pyr/environment/environment-r.yml
-conda env create -f environment-r.yml && conda activate h3
+wget https://raw.githubusercontent.com/bcm-uga/hadaca3/refs/heads/main/docker/codabench_hadaca_final/environment/env_final_pyr.yml
+conda env create -f env_final_pyr.yml && conda activate h3_final
 ```
 
 To run the docker interactively you can run with: 
 
 ```
 ## for the R version
-sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_pyr R`  and then `source("submission_script.R") 
+sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_final R`  and then `source("submission_script.R") 
 ## for the python version
-sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_pyr python` and then `import submission_script
+sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_final python` and then `import submission_script
 ```
 
 Due to the usage of the super user (sudo) all files created by docker will be owned by root. In order to retrieve ownership, you can use this command:
diff --git a/phase-1_2_3/bundle/submission.md b/phase-1_2_3/bundle/submission.md
index 512aeb2..335ff52 100755
--- a/phase-1_2_3/bundle/submission.md
+++ b/phase-1_2_3/bundle/submission.md
@@ -73,8 +73,8 @@ Then follow steps 4 and 5 described above.
 
 ```
 cd starting_kit
-sudo docker pull hombergn/hadaca3_pyr
-sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_pyr R
+sudo docker pull hombergn/hadaca3_final
+sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_final R
 source("submission_script.R")
 chown -R $USER submissions #if necessary
 ```
@@ -82,7 +82,7 @@ chown -R $USER submissions #if necessary
 Alternatively, run the following commande to execute the `submission_script`:
 
 ```
-sudo docker run -v .:/hadaca3 -w /hadaca3  hombergn/hadaca3_pyr Rscript submission_script.R
+sudo docker run -v .:/hadaca3 -w /hadaca3  hombergn/hadaca3_final Rscript submission_script.R
 ```
 
 and regain ownership of the files generated with:
diff --git a/phase-1_2_3/generate_bundle.sh b/phase-1_2_3/generate_bundle.sh
index 4169e1b..de47935 100644
--- a/phase-1_2_3/generate_bundle.sh
+++ b/phase-1_2_3/generate_bundle.sh
@@ -30,7 +30,7 @@ mkdir starting_kit
 #  generate baselines :
 rm -rf ~/projects/hadaca3/templates/tmp/
 Rscript ~/projects/hadaca3/templates/generate_baselines.R Phase_1  
-cp ~/projects/hadaca3/templates/tmp/* starting_kit/
+cp -R ~/projects/hadaca3/templates/tmp/* starting_kit/
 
 
 # rm -rf starting_kit_phase1
@@ -38,14 +38,15 @@ mkdir starting_kit_phase1
 
 rm -rf ~/projects/hadaca3/templates/tmp/
 Rscript ~/projects/hadaca3/templates/generate_baselines.R Phase_1_only  
-cp ~/projects/hadaca3/templates/tmp/* starting_kit_phase1/
+cp -R ~/projects/hadaca3/templates/tmp/* starting_kit_phase1/
 
 rm -r ~/projects/hadaca3/templates/tmp/ 
 
 
 #### Put input data inside the bundle ! 
-cd starting_kit/ ; zip  -FS  -r  ../bundle/starting_kit_phase2-3.zip *  -x \*submissions\* ; cd .. ; 
-cd starting_kit_phase1/ ; zip  -FS  -r  ../bundle/starting_kit_phase1.zip *  -x \*submissions\* ; cd .. ; 
+# cd starting_kit/ ; zip  -FS  -r  ../bundle/starting_kit_phase2-3.zip *  -x \*submissions\* ; cd .. ; 
+# cd starting_kit_phase1/ ; zip  -FS  -r  ../bundle/starting_kit_phase1.zip *  -x \*submissions\* ; cd .. ; 
+
 zip -FS -r -j bundle/input_data_phase2.zip input_data/
 zip -FS -r -j bundle/input_data_phase3.zip input_data_final/
 zip -FS -r -j bundle/input_data_phase1.zip input_data_phase1/
@@ -67,15 +68,15 @@ zip -FS -j -r  bundle/ground_truth_phase1.zip ground_truth_phase1/
 
 
 # ##### generate starting_kit and input_data outisde bundle.zip
-# cd starting_kit_phase1/ ; zip  -FS  -r  ../bundle/starting_kit_phase1.zip *  -x \*submissions\* -x \*data\* ; cd .. ; 
-# cd starting_kit/ ; zip  -FS  -r  ../bundle/starting_kit_phase2-3.zip *  -x \*submissions\* -x \*data\* ; cd .. ; 
+cd starting_kit_phase1/ ; zip  -FS  -r  ../bundle/starting_kit_phase1.zip *  -x \*submissions\* -x \*data\* ; cd .. ; 
+cd starting_kit/ ; zip  -FS  -r  ../bundle/starting_kit_phase2-3.zip *  -x \*submissions\* -x \*data\* ; cd .. ; 
 
-# zip -FS -r -j input_data_phase2.zip input_data/
-# zip -FS -r -j input_data_phase3.zip input_data_final/
-# zip -FS -r -j input_data_phase1.zip input_data_phase1/
+zip -FS -r -j input_data_phase2.zip input_data/
+zip -FS -r -j input_data_phase3.zip input_data_final/
+zip -FS -r -j input_data_phase1.zip input_data_phase1/
 
-# cd starting_kit/ ; zip  -FS  -r  ../starting_kit_phase2-3.zip *  -x \*submissions\* ; cd .. ; 
-# cd starting_kit_phase1/ ; zip  -FS  -r  ../starting_kit_phase1.zip *  -x \*submissions\* ; cd .. ; 
+cd starting_kit/ ; zip  -FS  -r  ../starting_kit_phase2-3.zip *  -x \*submissions\* ; cd .. ; 
+cd starting_kit_phase1/ ; zip  -FS  -r  ../starting_kit_phase1.zip *  -x \*submissions\* ; cd .. ; 
 
 
 
diff --git a/phase-1_2_3/generate_data.sh b/phase-1_2_3/generate_data.sh
index fa06028..e8fb28a 100644
--- a/phase-1_2_3/generate_data.sh
+++ b/phase-1_2_3/generate_data.sh
@@ -98,6 +98,8 @@ do
 done
 
 
+# rm -rf starting_kit
+# rm -rf starting_kit_phase1
 
 
 mkdir starting_kit/data/
diff --git a/phase-1_2_3/ingestion_program/sub_ingestion.py b/phase-1_2_3/ingestion_program/sub_ingestion.py
index 5dbd350..2113f31 100644
--- a/phase-1_2_3/ingestion_program/sub_ingestion.py
+++ b/phase-1_2_3/ingestion_program/sub_ingestion.py
@@ -21,11 +21,9 @@
 try:
     # Define the target and link name
     target = "../ingested_program/attachement/"
-    link_name = "attachement/"
+    link_name = "attachement"
     
-    # Create a symbolic link
     os.symlink(target, link_name)
-    # print(f"Symbolic link created: {link_name} -> {target}")
 except FileExistsError:
     # Handle the case where the symbolic link already exists
     os.unlink(link_name)  # Remove the existing symbolic link
diff --git a/phase-1_2_3/scoring_program/detailed_results.Rmd b/phase-1_2_3/scoring_program/detailed_results.Rmd
index d79c2c3..6a53b00 100644
--- a/phase-1_2_3/scoring_program/detailed_results.Rmd
+++ b/phase-1_2_3/scoring_program/detailed_results.Rmd
@@ -1,6 +1,6 @@
 ---
 title: "Visualize Results"
-author: "Elise Amblard, Hugo Barbot, Florent Chuffart and Magali Richard"
+# author: "Elise Amblard, Hugo Barbot, Florent Chuffart and Magali Richard"
 date: "`r Sys.Date()`"
 output: 
   # prettydoc::html_pretty: # create a styles.css file and snakemake doesn't like it
diff --git a/phase-1_2_3/scoring_program/scoring.R b/phase-1_2_3/scoring_program/scoring.R
index eed7838..4504248 100644
--- a/phase-1_2_3/scoring_program/scoring.R
+++ b/phase-1_2_3/scoring_program/scoring.R
@@ -226,7 +226,7 @@ scoring_function <- function(A_real, A_pred) {
   else if (nrow(A_pred) > nrow(A_real) & setequal(rownames(A_real), c("basal",'classic'))) { # partial ground truth only for the in vivo dataset
     rmse = NA
     mae = NA
-    aitchison = eval_Aitchison(A_real, A_pred[rownames(A_real),])
+    aitchison = NA
     pearson_tot = NA
     pearson_col = NA
     pearson_row = correlationP_row(A_real, A_pred[rownames(A_real),])
@@ -277,6 +277,11 @@ scoring_function <- function(A_real, A_pred) {
   }
   judge_candidate_norm = apply(judge_candidate, 2, CenterScaleNorm)
 
+  # transform scores s.t. 1 is the best score
+  judge_candidate_norm = 1 - judge_candidate_norm
+  judge_candidate_norm[,grep("pearson",colnames(judge_candidate_norm))] = 1 - judge_candidate_norm[,grep("pearson",colnames(judge_candidate_norm))]
+  judge_candidate_norm[,grep("spearman",colnames(judge_candidate_norm))] = 1 - judge_candidate_norm[,grep("spearman",colnames(judge_candidate_norm))]
+  
   # Average over judges with the geometric mean for the candidate of interest
   #score_aggreg = exp(mean(log(judge_candidate_norm[1,]),na.rm=T))
   weights = c(1/3*1/2,1/3*1/2,
diff --git a/phase-1_2_3/starting_kit/submission_script_installpkgcran.R b/phase-1_2_3/starting_kit/submission_script_installpkgcran.R
deleted file mode 100644
index 20e5172..0000000
--- a/phase-1_2_3/starting_kit/submission_script_installpkgcran.R
+++ /dev/null
@@ -1,230 +0,0 @@
-##################################################################################################
-### PLEASE only edit the program function between YOUR CODE BEGINS/ENDS HERE                   ###
-##################################################################################################
-
-#' The function to estimate the A matrix
-#' In the provided example, we use basic non-negative least squares (package "nnls"), which consists of minimizing the error term $||Mix - Ref \times Prop||^2$ with only positive entries in the prop matrix.
-#'
-#' @param mix a matrix of bulks (columns) and features (rows)
-#' @param ref a matrix pure types (columns) and features (rows)
-#' @param ... other parameters that will be ignored
-#' 
-#' @return the estimated A matrix
-#' 
-program = function(mix=NULL, ref=NULL, ...) {
-
-  ##
-  ## YOUR CODE BEGINS HERE
-  ##
-
-  install.packages("beeswarm")
-
-  # Creation of an index, idx_feat, corresponding to the intersection of features present in the references and those present in the mixtures.
-  idx_feat = intersect(rownames(mix), rownames(ref))
-  
-  # Estimation of proportions
-  prop = apply(mix[idx_feat,], 2, function(b, A) {
-    tmp_prop = lm(b ~ A - 1)$coefficients  # Using `-1` to remove the intercept
-    # tmp_prop = nnls::nnls(b=b,A=A)$x  
-    tmp_prop = tmp_prop / sum(tmp_prop)    # Sum To One
-    return(tmp_prop)
-  }, A=ref[idx_feat,])
-
-  # Labeling of estimated proportions 
-  rownames(prop) = colnames(ref)
-  return(prop)
-  
-  ##
-  ## YOUR CODE ENDS HERE nnls()
-  ##
-}
-
-install.packages = function (pkgs, repos="https://cloud.r-project.org", ...) {
-  installed_packages <- installed.packages( )
-  for (package in pkgs ) {
-    if ( !{ package %in% installed_packages } ) {
-     print(x = paste("Installation of ", package, sep = "") )
-      utils::install.packages(
-        pkgs = package,
-        repos = repos,
-        ...
-      )
-    } else {
-      print(x = paste(package, " is installed.", sep = "") )
-    }
-  }
-}
-
-
-
-##############################################################
-### Generate a prediction file /!\ DO NOT CHANGE THIS PART ###
-##############################################################
-
-
-
-mixes_data = readRDS("mixes_smoothies_fruits.rds")
-reference_data = readRDS("reference_fruits.rds")
-
-# we use the previously defined function 'program' to estimate A :
-pred_prop <- program(
-  mix = mixes_data ,
-  ref = reference_data
-)
-
-
-
-##############################################################
-### Validate the prediction /!\ DO NOT CHANGE THIS PART ###
-##############################################################
-
-validate_pred <- function(pred, nb_samples , nb_cells,col_names ){
-
-  error_status = 0   # 0 means no errors, 1 means "Fatal errors" and 2 means "Warning"
-  error_informations = ''
-
-  ## Ensure that all sum ofcells proportion approximately equal 1
-  if (!all(sapply(colSums(pred), function(x) isTRUE(all.equal(x, 1) )))) {
-    msg = "The prediction matrix does not respect the laws of proportions: the sum of each columns should be equal to 1\n"
-    error_informations = paste(error_informations,msg)
-    error_status = 2
-  }
-
-  ##Ensure that the prediction have the correct names ! 
-  if(! setequal(rownames(pred),col_names) ){
-    msg = paste0(    "The row names in the prediction matrix should match: ", toString(col_names),"\n")
-    error_informations = paste(error_informations,msg)
-    error_status = 2
-  }
-
-  ## Ensure that the prediction return the correct number of samples and  number of cells. 
-  if (nrow(pred) != nb_cells  | ncol(pred) != nb_samples)  {
-    msg= paste0('The prediction matrix has the dimention: ',toString(dim(pred))," whereas the dimention: ",toString(c(nb_cells,nb_samples))," is expected\n"   )
-    error_informations = paste(error_informations,msg)
-    error_status = 1
-  }
-
-  if(error_status == 1){
-    # The error is blocking and should therefor stop the execution. 
-    stop(error_informations)
-  }
-  if(error_status == 2){
-    print("Warning: ")
-    warning(error_informations)
-  }  
-}
-
-
-
-validate_pred <- function(pred, nb_samples = ncol(mixes_data) , nb_cells= ncol(reference_data),col_names = colnames(reference_data) )
-
-
-###############################
-### Code submission mode
-
-print("")
-for (package in c("zip") ) {
-  if ( !{ package %in% installed.packages( ) } ) {
-        print(x = paste("Installation of ", package, sep = "") )
-        install.packages(
-            pkgs = "zip"
-          , repos = "https://cloud.r-project.org"
-        )
-    } 
-}
-
-
-# we generate a zip file with the 'program' source code
-print('')
-if ( !dir.exists(paths = "submissions") ) {
-    dir.create(path = "submissions")
-}
-
-# we save the source code as a R file named 'program.R' :
-dump(
-    list = c("program")
-    # list = new_functions
-  , file = paste0("submissions", .Platform$file.sep, "program.R")
-)
-
-date_suffix = format(x = Sys.time( ), format = "%Y_%m_%d_%H_%M_%S")
-
-zip_program <- paste0("submissions", .Platform$file.sep, "program_", date_suffix, ".zip")
-zip::zip(zipfile= zip_program
-  , files   = paste0("submissions", .Platform$file.sep, "program.R")
-  , mode = "cherry-pick")
-
-if(dir.exists("attachement")) {
-  zip::zip_append(
-      zipfile = zip_program
-      , files= paste0("attachement", .Platform$file.sep)
-    , mode = "cherry-pick"
-  )
-}
-
-zip::zip_list(zip_program)
-print(x = zip_program)
-
-
-
-
-# # we create the associated zip file :
-# zip_program <- paste0("submissions", .Platform$file.sep, "program_", date_suffix, ".zip")
-# zip::zip(zipfile= zip_program
-#                 , files= paste0("submissions", .Platform$file.sep, "program.R")
-#                 , mode = "cherry-pick"
-#                 )
-
-# zip::zip_list(zip_program)
-# print(x = zip_program)
-
-###############################
-### Result submission mode  
-
-#  Generate a zip file with the prediction
-if ( !dir.exists(paths = "submissions") ) {
-    dir.create(path = "submissions")
-}
-
-prediction_name = "prediction.rds"
-
-## we save the estimated A matrix as a rds file named 'results.rds' :
-saveRDS(
-object = pred_prop
-, file   = paste0("submissions", .Platform$file.sep, prediction_name)
-) 
-
-# write_rds(pred_prop, file = "prediction_hugo.rds")
-
-## we create the associated zip file :
-zip_results <- paste0("submissions", .Platform$file.sep, "results_", date_suffix, ".zip")
-zip::zipr(
-         zipfile = zip_results
-       , files   = paste0("submissions", .Platform$file.sep, c(prediction_name) )
-     )
-print(x = zip_results)
-
-sessionInfo( )
-
-###############################################################
-### How to submit the zip file? /!\ DO NOT CHANGE THIS PART ###
-###############################################################
-#
-# The code above generates the files *`r zip_program`*  and *`r zip_results`*  (the 1st one for code submission, the 2nd one for result submission).
-#
-# Submit the zip submission file on the challenge in the `My Submission` tab, fill the metadata, select the task you want to submit to and upload your submission files
-#
-# On the codalab challenge web page, The *STATUS* become :
-#   - Submitting
-#   - Submitted
-#   - Running
-#   - Finished
-#
-# When it’s finished :
-#   - You refresh the page and click on the green button 'add to leaderboard' to see your score
-#   - If enable, details for report could be downloaded by clicking on your submission
-#   - Some execution logs are available to check and or download.
-#   - Metadata are editable when you click on your submission
-#   - Leader board is updated in the `Results` tab.
-#
-
diff --git a/phase-1_2_3/starting_kit/submission_script_nnlsmultimodalSource.R b/phase-1_2_3/starting_kit/submission_script_nnlsmultimodalSource.R
index 6f44ca5..9c5d86c 100644
--- a/phase-1_2_3/starting_kit/submission_script_nnlsmultimodalSource.R
+++ b/phase-1_2_3/starting_kit/submission_script_nnlsmultimodalSource.R
@@ -21,7 +21,7 @@ program <- function(mix_rna=NULL, ref_bulkRNA=NULL,
   ## YOUR CODE BEGINS HERE
   ##
   
-  source("attachement/link_gene_CpG.R")
+  source("attachement/Source_prior_known_features.R")
   
   
   
@@ -32,6 +32,10 @@ program <- function(mix_rna=NULL, ref_bulkRNA=NULL,
     mix_rna = mix_rna[idx_feat,]
     ref_bulkRNA = ref_bulkRNA[idx_feat,]
     
+    mix_rna = mix_rna[random_choosen_features$random_choosen_genes,]
+    ref_bulkRNA = ref_bulkRNA[random_choosen_features$random_choosen_genes,]
+    
+    
     prop_rna = apply(mix_rna, 2, function(b, A) {
       tmp_prop = nnls::nnls(b=b, A=A)$x
       tmp_prop = tmp_prop / sum(tmp_prop) # Sum To One
@@ -51,15 +55,8 @@ program <- function(mix_rna=NULL, ref_bulkRNA=NULL,
     mix_met = mix_met[idx_feat,]
     ref_met = ref_met[idx_feat,]
     
-    
-    
-    # select gene present mix_rna if present
-    if ( !( is.null(x = mix_rna) ) ) {
-      probes_feature = probes_feature[probes_feature$gene %in% rownames(mix_rna),]
-    }
-    
-    mix_met = mix_met[rownames(mix_met) %in% rownames(probes_feature),]
-    ref_met = ref_met[rownames(ref_met) %in% rownames(probes_feature),]
+    mix_met = mix_met[random_choosen_features$random_choosen_probes,]
+    ref_met = ref_met[random_choosen_features$random_choosen_probes,]
     
     
     prop_met = apply(mix_met, 2, function(b, A) {
diff --git a/phase-1_2_3/starting_kit/submissions/program.R b/phase-1_2_3/starting_kit/submissions/program.R
new file mode 100644
index 0000000..513588c
--- /dev/null
+++ b/phase-1_2_3/starting_kit/submissions/program.R
@@ -0,0 +1,13 @@
+program <-
+function (mix = NULL, ref = NULL, ...) 
+{
+    idx_feat = intersect(rownames(mix), rownames(ref))
+    prop = apply(mix[idx_feat, ], 2, function(b, A) {
+        tmp_prop = lm(b ~ A - 1)$coefficients
+        tmp_prop[tmp_prop < 0] = 0
+        tmp_prop = tmp_prop/sum(tmp_prop)
+        return(tmp_prop)
+    }, A = ref[idx_feat, ])
+    rownames(prop) = colnames(ref)
+    return(prop)
+}
diff --git a/phase-1_2_3/starting_kit/submissions/program.py b/phase-1_2_3/starting_kit/submissions/program.py
deleted file mode 100644
index 48b2f01..0000000
--- a/phase-1_2_3/starting_kit/submissions/program.py
+++ /dev/null
@@ -1,42 +0,0 @@
-def program(mix=None, ref=None, **kwargs):
-
-  ##
-  ## YOUR CODE BEGINS HERE
-  ##
-
-  required_packages = ["sklearn","pandas",'scipy']
-  install_and_import_packages(required_packages)
-  from sklearn.linear_model import LinearRegression
-
-  from attachement import additionnal_script
-  additionnal_script.useless_function()
-
-
-  def estimate_proportions(mix_df, ref_df):
-    results = []
-    for i in range(len(mix_df.columns)):
-        mix_col = mix_df.iloc[:, i]  # Select the i-th column as a Series
-        res = LinearRegression(fit_intercept=False).fit(ref_df, mix_col).coef_
-        # res, _ = scipy.optimize.nnls(ref_df.to_numpy(), mix_col.to_numpy())
-        res[res < 0] = 0
-        results.append(res)
-
-    # Normalize the results to get proportions
-    props = pandas.DataFrame([res_i / sum(res_i) for res_i in results], columns=ref_df.columns)
-    return props.T
-  
-
-  # Creation of an index, idx_feat, corresponding to the intersection of features present in the references and those present in the mixtures.
-  idx_feat = mix.index.intersection(ref.index)
-  mix_filtered = mix.loc[idx_feat, :]
-  ref_filtered = ref.loc[idx_feat, :]
-
-  prop = estimate_proportions(mix_filtered, ref_filtered)
- 
-  # Labeling of estimated proportions 
-  prop.columns = mix.columns
-
-  return prop
-  ##
-  ## YOUR CODE ENDS HERE
-  ##
diff --git a/phase-1_2_3/starting_kit_phase1/submission_script.R b/phase-1_2_3/starting_kit_phase1/submission_script.R
deleted file mode 100644
index cc9075d..0000000
--- a/phase-1_2_3/starting_kit_phase1/submission_script.R
+++ /dev/null
@@ -1,246 +0,0 @@
-##################################################################################################
-### PLEASE only edit the program function between YOUR CODE BEGINS/ENDS HERE                   ###
-##################################################################################################
-
-#' The function to estimate the A matrix
-#' In the provided example, we use basic non-negative least squares (package "nnls"), which consists of minimizing the error term $||Mix - Ref \times Prop||^2$ with only positive entries in the prop matrix.
-#'
-#' @param mix a matrix of bulks (columns) and features (rows)
-#' @param ref a matrix pure types (columns) and features (rows)
-#' @param ... other parameters that will be ignored
-#' 
-#' @return the estimated A matrix
-#' 
-program = function(mix=NULL, ref=NULL, ...) {
-
-  ##
-  ## YOUR CODE BEGINS HERE
-  ##
-
-  # Creation of an index, idx_feat, corresponding to the intersection of features present in the references and those present in the mixtures.
-  idx_feat = intersect(rownames(mix), rownames(ref))
-  
-  # Estimation of proportions
-  prop = apply(mix[idx_feat,], 2, function(b, A) {
-    tmp_prop = lm(b ~ A - 1)$coefficients  # Using `-1` to remove the intercept
-    # tmp_prop = nnls::nnls(b=b,A=A)$x  
-    tmp_prop[tmp_prop < 0] = 0
-    tmp_prop = tmp_prop / sum(tmp_prop)    # Sum To One
-    return(tmp_prop)
-  }, A=ref[idx_feat,])
-
-  # Labeling of estimated proportions 
-  rownames(prop) = colnames(ref)
-  return(prop)
-  
-  ##
-  ## YOUR CODE ENDS HERE
-  ##
-}
-
-
-##############################################################
-### Generate a prediction file /!\ DO NOT CHANGE THIS PART ###
-##############################################################
-
-validate_pred <- function(pred, nb_samples = ncol(mix_rna) , nb_cells= ncol(ref_rna),col_names = colnames(ref_met) ){
-
-  error_status = 0   # 0 means no errors, 1 means "Fatal errors" and 2 means "Warning"
-  error_informations = ''
-
-  ## Ensure that all sum ofcells proportion approximately equal 1
-  if (!all(sapply(colSums(pred), function(x) isTRUE(all.equal(x, 1) )))) {
-    msg = "The prediction matrix does not respect the laws of proportions: the sum of each columns should be equal to 1\n"
-    error_informations = paste(error_informations,msg)
-    error_status = 2
-  }
-
-  ##Ensure that the prediction have the correct names ! 
-  if(! setequal(rownames(pred),col_names) ){
-    msg = paste0(    "The row names in the prediction matrix should match: ", toString(col_names),"\n")
-    error_informations = paste(error_informations,msg)
-    error_status = 2
-  }
-
-  ## Ensure that the prediction return the correct number of samples and  number of cells. 
-  if (nrow(pred) != nb_cells  | ncol(pred) != nb_samples)  {
-    msg= paste0('The prediction matrix has the dimention: ',toString(dim(pred))," whereas the dimention: ",toString(c(nb_cells,nb_samples))," is expected\n"   )
-    error_informations = paste(error_informations,msg)
-    error_status = 1
-  }
-
-  if(error_status == 1){
-    # The error is blocking and should therefor stop the execution. 
-    # tryCatch(message("hello\n"), message=function(e){cat("goodbye\n")})  use this here ? 
-    stop(error_informations)
-  }
-  if(error_status == 2){
-    print("Warning: ")
-    warning(error_informations)
-  }  
-}
-
-dir_name = paste0("data",.Platform$file.sep)
-dataset_list = list.files(dir_name,pattern="mixes*")
-
-reference_data <- readRDS(file =  paste0(dir_name, "reference_pdac.rds"))
-
-
-predi_list = list()
-for (dataset_name in dataset_list){
-
-  print(paste0("generating prediction for dataset:",toString(dataset_name) ))
-
-  mixes_data <- readRDS(file = paste0(dir_name, dataset_name))
-
-  if ("mix_rna" %in% names(mixes_data)) {
-    mix_rna = mixes_data$mix_rna
-  } else {
-    mix_rna = mixes_data
-  }
-  if ("mix_met" %in% names(mixes_data)) {
-    mix_met = mixes_data$mix_met  
-  } else {
-    mix_met = NULL
-  }
-
-  if ("ref_bulkRNA" %in% names(reference_data)) {
-    ref_bulkRNA = reference_data$ref_bulkRNA
-  } else {
-    ref_bulkRNA = reference_data
-  }
-  if ("ref_met" %in% names(reference_data)) {
-    ref_met = reference_data$ref_met  
-  } else {
-    ref_met = NULL
-  }
-  if ("ref_scRNA" %in% names(reference_data)) {
-    ref_scRNA = reference_data$ref_scRNA  
-  } else {
-    ref_scRNA = NULL
-  }
-
-  # we use the previously defined function 'program' to estimate A :
-  pred_prop <- program(mix_rna, ref_bulkRNA, mix_met=mix_met, ref_met=ref_met, ref_scRNA=ref_scRNA)
-  validate_pred(pred_prop,nb_samples = ncol(mix_rna),nb_cells = ncol(ref_bulkRNA),col_names = colnames(ref_met))
-  predi_list[[dataset_name]] = pred_prop
-
-}
-
-
-##############################################################
-### Check the prediction /!\ DO NOT CHANGE THIS PART ###
-##############################################################
-
-
-###############################
-### Code submission mode
-
-
-print("")
-for (package in c("zip") ) {
-  if ( !{ package %in% installed.packages( ) } ) {
-        print(x = paste("Installation of ", package, sep = "") )
-        install.packages(
-            pkgs = "zip"
-          , repos = "https://cloud.r-project.org"
-        )
-    } 
-}
-
-
-# we generate a zip file with the 'program' source code
-
-if ( !dir.exists(paths = "submissions") ) {
-    dir.create(path = "submissions")
-}
-
-# we save the source code as a R file named 'program.R' :
-dump(
-    list = c("program")
-    # list = new_functions
-  , file = paste0("submissions", .Platform$file.sep, "program.R")
-)
-
-date_suffix = format(x = Sys.time( ), format = "%Y_%m_%d_%H_%M_%S")
-
-
-
-zip_program <- paste0("submissions", .Platform$file.sep, "program_", date_suffix, ".zip")
-zip::zip(zipfile= zip_program
-  , files   = paste0("submissions", .Platform$file.sep, "program.R")
-  , mode = "cherry-pick")
-
-if(dir.exists("attachement")) {
-  zip::zip_append(
-      zipfile = zip_program
-      , files= paste0("attachement", .Platform$file.sep)
-    , mode = "cherry-pick"
-  )
-}
-
-zip::zip_list(zip_program)
-print(x = zip_program)
-
-
-
-
-# # we create the associated zip file :
-# zip_program <- paste0("submissions", .Platform$file.sep, "program_", date_suffix, ".zip")
-# zip::zip(zipfile= zip_program
-#                 , files= paste0("submissions", .Platform$file.sep, "program.R")
-#                 , mode = "cherry-pick"
-#                 )
-
-# zip::zip_list(zip_program)
-# print(x = zip_program)
-
-###############################
-### Result submission mode  
-
-#  Generate a zip file with the prediction
-if ( !dir.exists(paths = "submissions") ) {
-    dir.create(path = "submissions")
-}
-
-prediction_name = "prediction.rds"
-
-## we save the estimated A matrix as a rds file named 'results.rds' :
-saveRDS(
-object = predi_list
-, file   = paste0("submissions", .Platform$file.sep, prediction_name)) 
-
-# write_rds(pred_prop, file = "prediction_hugo.rds")
-
-## we create the associated zip file :
-zip_results <- paste0("submissions", .Platform$file.sep, "results_", date_suffix, ".zip")
-zip::zipr(
-         zipfile = zip_results
-       , files   = paste0("submissions", .Platform$file.sep, c(prediction_name) )
-     )
-print(x = zip_results)
-
-sessionInfo( )
-
-###############################################################
-### How to submit the zip file? /!\ DO NOT CHANGE THIS PART ###
-###############################################################
-#
-# The code above generates the files *`r zip_program`*  and *`r zip_results`*  (the 1st one for code submission, the 2nd one for result submission).
-#
-# Submit the zip submission file on the challenge in the `My Submission` tab, fill the metadata, select the task you want to submit to and upload your submission files
-#
-# On the codalab challenge web page, The *STATUS* become :
-#   - Submitting
-#   - Submitted
-#   - Running
-#   - Finished
-#
-# When it’s finished :
-#   - You refresh the page and click on the green button 'add to leaderboard' to see your score
-#   - If enable, details for report could be downloaded by clicking on your submission
-#   - Some execution logs are available to check and or download.
-#   - Metadata are editable when you click on your submission
-#   - Leader board is updated in the `Results` tab.
-#
-
diff --git a/phase-1_2_3/starting_kit_phase1/submission_script.py b/phase-1_2_3/starting_kit_phase1/submission_script.py
deleted file mode 100644
index d6ea552..0000000
--- a/phase-1_2_3/starting_kit_phase1/submission_script.py
+++ /dev/null
@@ -1,252 +0,0 @@
-##################################################################################################
-### PLEASE only edit the program function between YOUR CODE BEGINS/ENDS HERE                   ###
-##################################################################################################
-
-
-########################################################
-### Package dependencies /!\ DO NOT CHANGE THIS PART ###
-########################################################
-import subprocess
-import sys
-import importlib
-
-def program(mix=None, ref=None, **kwargs):
-
-  ##
-  ## YOUR CODE BEGINS HERE
-  ##
-
-  required_packages = ["sklearn","pandas",'scipy']
-  install_and_import_packages(required_packages)
-  from sklearn.linear_model import LinearRegression
-
-  from attachement import additionnal_script
-  additionnal_script.useless_function()
-
-
-  def estimate_proportions(mix_df, ref_df):
-    results = []
-    for i in range(len(mix_df.columns)):
-        mix_col = mix_df.iloc[:, i]  # Select the i-th column as a Series
-        res = LinearRegression(fit_intercept=False).fit(ref_df, mix_col).coef_
-        # res, _ = scipy.optimize.nnls(ref_df.to_numpy(), mix_col.to_numpy())
-        res[res < 0] = 0
-        results.append(res)
-
-    # Normalize the results to get proportions
-    props = pandas.DataFrame([res_i / sum(res_i) for res_i in results], columns=ref_df.columns)
-    return props.T
-  
-
-  # Creation of an index, idx_feat, corresponding to the intersection of features present in the references and those present in the mixtures.
-  idx_feat = mix.index.intersection(ref.index)
-  mix_filtered = mix.loc[idx_feat, :]
-  ref_filtered = ref.loc[idx_feat, :]
-
-  prop = estimate_proportions(mix_filtered, ref_filtered)
- 
-  # Labeling of estimated proportions 
-  prop.columns = mix.columns
-
-  return prop
-  ##
-  ## YOUR CODE ENDS HERE
-  ##
-
-
-# Install and import each package
-def install_and_import_packages(required_packages):
-  for package in required_packages:
-      try:
-          globals()[package] = importlib.import_module(package)
-      except ImportError:
-          print('impossible to import, installing packages',package)
-          package_to_install = 'scikit-learn' if package == 'sklearn' else package
-          subprocess.check_call([sys.executable, "-m", "pip", "install", package_to_install])
-          globals()[package] = importlib.import_module(package)
-
-def validate_pred(pred, nb_samples=None, nb_cells=None, col_names=None):
-    error_status = 0  # 0 means no errors, 1 means "Fatal errors" and 2 means "Warning"
-    error_informations = ''
-
-    # Ensure that all sum of cells proportion approximately equal 1
-    if not numpy.allclose(numpy.sum(pred, axis=0), 1):
-        msg = "The prediction matrix does not respect the laws of proportions: the sum of each column should be equal to 1\n"
-        error_informations += msg
-        error_status = 2
-
-    # Ensure that the prediction has the correct names
-    if not set(col_names) == set(pred.index):
-        msg = f"The row names in the prediction matrix should match: {col_names}\n"
-        error_informations += msg
-        error_status = 2
-
-    # Ensure that the prediction returns the correct number of samples and number of cells
-    if pred.shape != (nb_cells, nb_samples):
-        msg = f'The prediction matrix has the dimension: {pred.shape} whereas the dimension: {(nb_cells, nb_samples)} is expected\n'
-        error_informations += msg
-        error_status = 1
-
-    if error_status == 1:
-        # The error is blocking and should therefore stop the execution
-        raise ValueError(error_informations)
-    if error_status == 2:
-        print("Warning:")
-        print(error_informations)
-
-
-##############################################################
-### Generate a prediction file /!\ DO NOT CHANGE THIS PART ###
-##############################################################
-
-# List of required packages
-required_packages = [
-  "numpy",
-  "pandas",
-  "rpy2",
-  "zipfile",
-  "inspect",
-]
-install_and_import_packages(required_packages)
-
-# from rpy2.robjects import pandas2ri
-import os
-import rpy2.robjects
-readRDS = rpy2.robjects.r['readRDS']
-saveRDS= rpy2.robjects.r["saveRDS"]
-
-from rpy2.robjects import pandas2ri
-pandas2ri.activate()
-
-
-r_code_get_rowandcolnames = '''
-get_both <- function(ref_names = "reference_fruits.rds", mat = NULL) {
-  ref_names <- readRDS(ref_names)
-  if (!is.null(mat)) {
-    return(list(  rownames(ref_names[[mat]]), colnames(ref_names[[mat]]) ))
-  } else {
-    return(list(rownames(ref_names),colnames(ref_names) ))
-  }
-}
-'''
-rpy2.robjects.r(r_code_get_rowandcolnames)
-get_both_row_col = rpy2.robjects.r['get_both']
-
-
-# # Function to convert R object to pandas DataFrame or numpy array
-def r_object_to_python(r_object,file,element_name):
-    try:
-        # Try to convert to pandas DataFrame
-        return pandas2ri.rpy2py(r_object)
-    except NotImplementedError:
-        rows, columns =get_both_row_col(file,element_name)
-        if(isinstance(columns, type (rpy2.robjects.NULL))):
-            df = pandas.DataFrame(r_object, index=rows)
-        else: 
-            columns = list(columns)
-            df = pandas.DataFrame(r_object, columns=columns, index=rows)
-        return df
-
-# Function to extract named data elements and convert appropriately
-def extract_data_element(data, file, element_name):
-    if element_name in data.names:
-        element = data.rx2(element_name)
-        return r_object_to_python(element,file,element_name)
-    return None
-
-
-dir_name = "data"+os.sep
-
-datasets_list = [filename for filename in os.listdir(dir_name) if filename.startswith("mixes")]
-
-ref_file = os.path.join(dir_name, "reference_pdac.rds")
-print("reading reference file")
-reference_data = readRDS(ref_file)
-ref_bulkRNA = extract_data_element(reference_data,ref_file, 'ref_bulkRNA') 
-ref_met = extract_data_element(reference_data,ref_file, 'ref_met')
-ref_scRNA = extract_data_element(reference_data,ref_file, 'ref_scRNA')
-
-predi_dic = {}
-for dataset_name in datasets_list :
-
-    file= os.path.join(dir_name,dataset_name)
-    mixes_data = readRDS(file)
-
-    print(f"generating prediction for dataset: {dataset_name}")
-
-    mix_rna = extract_data_element(mixes_data,file, 'mix_rna') 
-    mix_met = extract_data_element(mixes_data,file, 'mix_met')
-
-    pred_prop = program(mix_rna, ref_bulkRNA, mix_met=mix_met, ref_met=ref_met   )
-    validate_pred(pred_prop, nb_samples=mix_rna.shape[1], nb_cells=ref_bulkRNA.shape[1], col_names=ref_bulkRNA.columns)
-    predi_dic[dataset_name] = pred_prop
-
-############################### 
-### Code submission mode
-
-# we generate a zip file with the 'program' source code
-
-if not os.path.exists("submissions"):
-    os.makedirs("submissions")
-
-# we save the source code as a Python file named 'program.py':
-with open(os.path.join("submissions", "program.py"), 'w') as f:
-    f.write(inspect.getsource(program))
-
-date_suffix = pandas.Timestamp.now().strftime("%Y_%m_%d_%H_%M_%S")
-
-
-
-
-# we create the associated zip file:
-zip_program = os.path.join("submissions", f"program_{date_suffix}.zip")
-with zipfile.ZipFile(zip_program, 'w') as zipf:
-    zipf.write(os.path.join("submissions", "program.py"), arcname="program.py")
-
-
-def zipdir(path, ziph):
-    # ziph is zipfile handle
-    for root, dirs, files in os.walk(path):
-        for file in files:
-            ziph.write(os.path.join(root, file), 
-                       os.path.relpath(os.path.join(root, file), 
-                                       os.path.join(path, '..')))
-if os.path.exists("attachement"):
-    with zipfile.ZipFile(zip_program, 'a', zipfile.ZIP_DEFLATED) as zipf:
-        zipdir('attachement/', zipf)
-
-
-
-# # Check if the "attachment" directory exists
-# if os.path.exists("attachement"):
-#     # Append the contents of the "attachment" directory to the zip archive
-#     with zipfile.ZipFile(zip_program, mode="a") as zf:
-#         for root, _, files in os.walk("attachement"):
-#             for file in files:
-#                 file_path = os.path.join(root, file)
-#                 # Add file to zip while preserving directory structure
-#                 arcname = os.path.relpath(file_path, start="attachement")
-#                 zf.write(file_path, arcname)
-
-print(zip_program)
-
-###############################
-### Result submission mode  
-
-# Generate a zip file with the prediction
-if not os.path.exists("submissions"):
-    os.makedirs("submissions")
-
-prediction_name = "prediction.rds"
-
-saveRDS(rpy2.robjects.ListVector(predi_dic), os.path.join("submissions", prediction_name))
-
-
-
-# Create the associated zip file:
-zip_results = os.path.join("submissions", f"results_{date_suffix}.zip")
-with zipfile.ZipFile(zip_results, 'w') as zipf:
-    zipf.write(os.path.join("submissions", prediction_name), arcname=prediction_name)
-
-print(zip_results)
-