From 3fea42724d8032844954f61ead8fd88625ee83e0 Mon Sep 17 00:00:00 2001 From: Nicolas HOMBERG Date: Fri, 29 Nov 2024 15:38:32 +0100 Subject: [PATCH] ready to upload --- phase-1_2_3/automated_docker_test.sh | 4 +- phase-1_2_3/bundle/FAQ.md | 10 +- phase-1_2_3/bundle/submission.md | 6 +- phase-1_2_3/generate_bundle.sh | 23 +- phase-1_2_3/generate_data.sh | 2 + .../ingestion_program/sub_ingestion.py | 4 +- .../scoring_program/detailed_results.Rmd | 2 +- phase-1_2_3/scoring_program/scoring.R | 7 +- .../submission_script_installpkgcran.R | 230 ---------------- .../submission_script_nnlsmultimodalSource.R | 17 +- .../starting_kit/submissions/program.R | 13 + .../starting_kit/submissions/program.py | 42 --- .../starting_kit_phase1/submission_script.R | 246 ----------------- .../starting_kit_phase1/submission_script.py | 252 ------------------ 14 files changed, 52 insertions(+), 806 deletions(-) delete mode 100644 phase-1_2_3/starting_kit/submission_script_installpkgcran.R create mode 100644 phase-1_2_3/starting_kit/submissions/program.R delete mode 100644 phase-1_2_3/starting_kit/submissions/program.py delete mode 100644 phase-1_2_3/starting_kit_phase1/submission_script.R delete mode 100644 phase-1_2_3/starting_kit_phase1/submission_script.py diff --git a/phase-1_2_3/automated_docker_test.sh b/phase-1_2_3/automated_docker_test.sh index 52f1594..311b5d0 100644 --- a/phase-1_2_3/automated_docker_test.sh +++ b/phase-1_2_3/automated_docker_test.sh @@ -37,8 +37,8 @@ echo "Create submission program" cd starting_kit/ rm -rf submissions # Rscript submission_script.R >> logs -# Rscript submission_script.R -python submission_script.py +Rscript submission_script.R +# python submission_script.py cd - echo "Done" diff --git a/phase-1_2_3/bundle/FAQ.md b/phase-1_2_3/bundle/FAQ.md index b2f7724..0f7a383 100644 --- a/phase-1_2_3/bundle/FAQ.md +++ b/phase-1_2_3/bundle/FAQ.md @@ -16,20 +16,20 @@ If your submission fails on Codabench, don't panic, review the logs! You can acc Try interactive docker or conda environnement, it is especially useful for python user! -A conda environment is provided follow this steps to install and activate it. Retrieve the file [environment-r.yml](https://github.com/bcm-uga/hadaca3/blob/main/docker/codabench_hadaca3_pyr/environment/environment-r.yml) from github as follow: +A conda environment is provided follow this steps to install and activate it. Retrieve the file [env_final_pyr.yml](https://github.com/bcm-uga/hadaca3/blob/main/docker/codabench_hadaca_final/environment/env_final_pyr.yml) from github as follow: ``` -wget https://raw.githubusercontent.com/bcm-uga/hadaca3/main/docker/codabench_hadaca3_pyr/environment/environment-r.yml -conda env create -f environment-r.yml && conda activate h3 +wget https://raw.githubusercontent.com/bcm-uga/hadaca3/refs/heads/main/docker/codabench_hadaca_final/environment/env_final_pyr.yml +conda env create -f env_final_pyr.yml && conda activate h3_final ``` To run the docker interactively you can run with: ``` ## for the R version -sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_pyr R` and then `source("submission_script.R") +sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_final R` and then `source("submission_script.R") ## for the python version -sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_pyr python` and then `import submission_script +sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_final python` and then `import submission_script ``` Due to the usage of the super user (sudo) all files created by docker will be owned by root. In order to retrieve ownership, you can use this command: diff --git a/phase-1_2_3/bundle/submission.md b/phase-1_2_3/bundle/submission.md index 512aeb2..335ff52 100755 --- a/phase-1_2_3/bundle/submission.md +++ b/phase-1_2_3/bundle/submission.md @@ -73,8 +73,8 @@ Then follow steps 4 and 5 described above. ``` cd starting_kit -sudo docker pull hombergn/hadaca3_pyr -sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_pyr R +sudo docker pull hombergn/hadaca3_final +sudo docker run -it -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_final R source("submission_script.R") chown -R $USER submissions #if necessary ``` @@ -82,7 +82,7 @@ chown -R $USER submissions #if necessary Alternatively, run the following commande to execute the `submission_script`: ``` -sudo docker run -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_pyr Rscript submission_script.R +sudo docker run -v .:/hadaca3 -w /hadaca3 hombergn/hadaca3_final Rscript submission_script.R ``` and regain ownership of the files generated with: diff --git a/phase-1_2_3/generate_bundle.sh b/phase-1_2_3/generate_bundle.sh index 4169e1b..de47935 100644 --- a/phase-1_2_3/generate_bundle.sh +++ b/phase-1_2_3/generate_bundle.sh @@ -30,7 +30,7 @@ mkdir starting_kit # generate baselines : rm -rf ~/projects/hadaca3/templates/tmp/ Rscript ~/projects/hadaca3/templates/generate_baselines.R Phase_1 -cp ~/projects/hadaca3/templates/tmp/* starting_kit/ +cp -R ~/projects/hadaca3/templates/tmp/* starting_kit/ # rm -rf starting_kit_phase1 @@ -38,14 +38,15 @@ mkdir starting_kit_phase1 rm -rf ~/projects/hadaca3/templates/tmp/ Rscript ~/projects/hadaca3/templates/generate_baselines.R Phase_1_only -cp ~/projects/hadaca3/templates/tmp/* starting_kit_phase1/ +cp -R ~/projects/hadaca3/templates/tmp/* starting_kit_phase1/ rm -r ~/projects/hadaca3/templates/tmp/ #### Put input data inside the bundle ! -cd starting_kit/ ; zip -FS -r ../bundle/starting_kit_phase2-3.zip * -x \*submissions\* ; cd .. ; -cd starting_kit_phase1/ ; zip -FS -r ../bundle/starting_kit_phase1.zip * -x \*submissions\* ; cd .. ; +# cd starting_kit/ ; zip -FS -r ../bundle/starting_kit_phase2-3.zip * -x \*submissions\* ; cd .. ; +# cd starting_kit_phase1/ ; zip -FS -r ../bundle/starting_kit_phase1.zip * -x \*submissions\* ; cd .. ; + zip -FS -r -j bundle/input_data_phase2.zip input_data/ zip -FS -r -j bundle/input_data_phase3.zip input_data_final/ zip -FS -r -j bundle/input_data_phase1.zip input_data_phase1/ @@ -67,15 +68,15 @@ zip -FS -j -r bundle/ground_truth_phase1.zip ground_truth_phase1/ # ##### generate starting_kit and input_data outisde bundle.zip -# cd starting_kit_phase1/ ; zip -FS -r ../bundle/starting_kit_phase1.zip * -x \*submissions\* -x \*data\* ; cd .. ; -# cd starting_kit/ ; zip -FS -r ../bundle/starting_kit_phase2-3.zip * -x \*submissions\* -x \*data\* ; cd .. ; +cd starting_kit_phase1/ ; zip -FS -r ../bundle/starting_kit_phase1.zip * -x \*submissions\* -x \*data\* ; cd .. ; +cd starting_kit/ ; zip -FS -r ../bundle/starting_kit_phase2-3.zip * -x \*submissions\* -x \*data\* ; cd .. ; -# zip -FS -r -j input_data_phase2.zip input_data/ -# zip -FS -r -j input_data_phase3.zip input_data_final/ -# zip -FS -r -j input_data_phase1.zip input_data_phase1/ +zip -FS -r -j input_data_phase2.zip input_data/ +zip -FS -r -j input_data_phase3.zip input_data_final/ +zip -FS -r -j input_data_phase1.zip input_data_phase1/ -# cd starting_kit/ ; zip -FS -r ../starting_kit_phase2-3.zip * -x \*submissions\* ; cd .. ; -# cd starting_kit_phase1/ ; zip -FS -r ../starting_kit_phase1.zip * -x \*submissions\* ; cd .. ; +cd starting_kit/ ; zip -FS -r ../starting_kit_phase2-3.zip * -x \*submissions\* ; cd .. ; +cd starting_kit_phase1/ ; zip -FS -r ../starting_kit_phase1.zip * -x \*submissions\* ; cd .. ; diff --git a/phase-1_2_3/generate_data.sh b/phase-1_2_3/generate_data.sh index fa06028..e8fb28a 100644 --- a/phase-1_2_3/generate_data.sh +++ b/phase-1_2_3/generate_data.sh @@ -98,6 +98,8 @@ do done +# rm -rf starting_kit +# rm -rf starting_kit_phase1 mkdir starting_kit/data/ diff --git a/phase-1_2_3/ingestion_program/sub_ingestion.py b/phase-1_2_3/ingestion_program/sub_ingestion.py index 5dbd350..2113f31 100644 --- a/phase-1_2_3/ingestion_program/sub_ingestion.py +++ b/phase-1_2_3/ingestion_program/sub_ingestion.py @@ -21,11 +21,9 @@ try: # Define the target and link name target = "../ingested_program/attachement/" - link_name = "attachement/" + link_name = "attachement" - # Create a symbolic link os.symlink(target, link_name) - # print(f"Symbolic link created: {link_name} -> {target}") except FileExistsError: # Handle the case where the symbolic link already exists os.unlink(link_name) # Remove the existing symbolic link diff --git a/phase-1_2_3/scoring_program/detailed_results.Rmd b/phase-1_2_3/scoring_program/detailed_results.Rmd index d79c2c3..6a53b00 100644 --- a/phase-1_2_3/scoring_program/detailed_results.Rmd +++ b/phase-1_2_3/scoring_program/detailed_results.Rmd @@ -1,6 +1,6 @@ --- title: "Visualize Results" -author: "Elise Amblard, Hugo Barbot, Florent Chuffart and Magali Richard" +# author: "Elise Amblard, Hugo Barbot, Florent Chuffart and Magali Richard" date: "`r Sys.Date()`" output: # prettydoc::html_pretty: # create a styles.css file and snakemake doesn't like it diff --git a/phase-1_2_3/scoring_program/scoring.R b/phase-1_2_3/scoring_program/scoring.R index eed7838..4504248 100644 --- a/phase-1_2_3/scoring_program/scoring.R +++ b/phase-1_2_3/scoring_program/scoring.R @@ -226,7 +226,7 @@ scoring_function <- function(A_real, A_pred) { else if (nrow(A_pred) > nrow(A_real) & setequal(rownames(A_real), c("basal",'classic'))) { # partial ground truth only for the in vivo dataset rmse = NA mae = NA - aitchison = eval_Aitchison(A_real, A_pred[rownames(A_real),]) + aitchison = NA pearson_tot = NA pearson_col = NA pearson_row = correlationP_row(A_real, A_pred[rownames(A_real),]) @@ -277,6 +277,11 @@ scoring_function <- function(A_real, A_pred) { } judge_candidate_norm = apply(judge_candidate, 2, CenterScaleNorm) + # transform scores s.t. 1 is the best score + judge_candidate_norm = 1 - judge_candidate_norm + judge_candidate_norm[,grep("pearson",colnames(judge_candidate_norm))] = 1 - judge_candidate_norm[,grep("pearson",colnames(judge_candidate_norm))] + judge_candidate_norm[,grep("spearman",colnames(judge_candidate_norm))] = 1 - judge_candidate_norm[,grep("spearman",colnames(judge_candidate_norm))] + # Average over judges with the geometric mean for the candidate of interest #score_aggreg = exp(mean(log(judge_candidate_norm[1,]),na.rm=T)) weights = c(1/3*1/2,1/3*1/2, diff --git a/phase-1_2_3/starting_kit/submission_script_installpkgcran.R b/phase-1_2_3/starting_kit/submission_script_installpkgcran.R deleted file mode 100644 index 20e5172..0000000 --- a/phase-1_2_3/starting_kit/submission_script_installpkgcran.R +++ /dev/null @@ -1,230 +0,0 @@ -################################################################################################## -### PLEASE only edit the program function between YOUR CODE BEGINS/ENDS HERE ### -################################################################################################## - -#' The function to estimate the A matrix -#' In the provided example, we use basic non-negative least squares (package "nnls"), which consists of minimizing the error term $||Mix - Ref \times Prop||^2$ with only positive entries in the prop matrix. -#' -#' @param mix a matrix of bulks (columns) and features (rows) -#' @param ref a matrix pure types (columns) and features (rows) -#' @param ... other parameters that will be ignored -#' -#' @return the estimated A matrix -#' -program = function(mix=NULL, ref=NULL, ...) { - - ## - ## YOUR CODE BEGINS HERE - ## - - install.packages("beeswarm") - - # Creation of an index, idx_feat, corresponding to the intersection of features present in the references and those present in the mixtures. - idx_feat = intersect(rownames(mix), rownames(ref)) - - # Estimation of proportions - prop = apply(mix[idx_feat,], 2, function(b, A) { - tmp_prop = lm(b ~ A - 1)$coefficients # Using `-1` to remove the intercept - # tmp_prop = nnls::nnls(b=b,A=A)$x - tmp_prop = tmp_prop / sum(tmp_prop) # Sum To One - return(tmp_prop) - }, A=ref[idx_feat,]) - - # Labeling of estimated proportions - rownames(prop) = colnames(ref) - return(prop) - - ## - ## YOUR CODE ENDS HERE nnls() - ## -} - -install.packages = function (pkgs, repos="https://cloud.r-project.org", ...) { - installed_packages <- installed.packages( ) - for (package in pkgs ) { - if ( !{ package %in% installed_packages } ) { - print(x = paste("Installation of ", package, sep = "") ) - utils::install.packages( - pkgs = package, - repos = repos, - ... - ) - } else { - print(x = paste(package, " is installed.", sep = "") ) - } - } -} - - - -############################################################## -### Generate a prediction file /!\ DO NOT CHANGE THIS PART ### -############################################################## - - - -mixes_data = readRDS("mixes_smoothies_fruits.rds") -reference_data = readRDS("reference_fruits.rds") - -# we use the previously defined function 'program' to estimate A : -pred_prop <- program( - mix = mixes_data , - ref = reference_data -) - - - -############################################################## -### Validate the prediction /!\ DO NOT CHANGE THIS PART ### -############################################################## - -validate_pred <- function(pred, nb_samples , nb_cells,col_names ){ - - error_status = 0 # 0 means no errors, 1 means "Fatal errors" and 2 means "Warning" - error_informations = '' - - ## Ensure that all sum ofcells proportion approximately equal 1 - if (!all(sapply(colSums(pred), function(x) isTRUE(all.equal(x, 1) )))) { - msg = "The prediction matrix does not respect the laws of proportions: the sum of each columns should be equal to 1\n" - error_informations = paste(error_informations,msg) - error_status = 2 - } - - ##Ensure that the prediction have the correct names ! - if(! setequal(rownames(pred),col_names) ){ - msg = paste0( "The row names in the prediction matrix should match: ", toString(col_names),"\n") - error_informations = paste(error_informations,msg) - error_status = 2 - } - - ## Ensure that the prediction return the correct number of samples and number of cells. - if (nrow(pred) != nb_cells | ncol(pred) != nb_samples) { - msg= paste0('The prediction matrix has the dimention: ',toString(dim(pred))," whereas the dimention: ",toString(c(nb_cells,nb_samples))," is expected\n" ) - error_informations = paste(error_informations,msg) - error_status = 1 - } - - if(error_status == 1){ - # The error is blocking and should therefor stop the execution. - stop(error_informations) - } - if(error_status == 2){ - print("Warning: ") - warning(error_informations) - } -} - - - -validate_pred <- function(pred, nb_samples = ncol(mixes_data) , nb_cells= ncol(reference_data),col_names = colnames(reference_data) ) - - -############################### -### Code submission mode - -print("") -for (package in c("zip") ) { - if ( !{ package %in% installed.packages( ) } ) { - print(x = paste("Installation of ", package, sep = "") ) - install.packages( - pkgs = "zip" - , repos = "https://cloud.r-project.org" - ) - } -} - - -# we generate a zip file with the 'program' source code -print('') -if ( !dir.exists(paths = "submissions") ) { - dir.create(path = "submissions") -} - -# we save the source code as a R file named 'program.R' : -dump( - list = c("program") - # list = new_functions - , file = paste0("submissions", .Platform$file.sep, "program.R") -) - -date_suffix = format(x = Sys.time( ), format = "%Y_%m_%d_%H_%M_%S") - -zip_program <- paste0("submissions", .Platform$file.sep, "program_", date_suffix, ".zip") -zip::zip(zipfile= zip_program - , files = paste0("submissions", .Platform$file.sep, "program.R") - , mode = "cherry-pick") - -if(dir.exists("attachement")) { - zip::zip_append( - zipfile = zip_program - , files= paste0("attachement", .Platform$file.sep) - , mode = "cherry-pick" - ) -} - -zip::zip_list(zip_program) -print(x = zip_program) - - - - -# # we create the associated zip file : -# zip_program <- paste0("submissions", .Platform$file.sep, "program_", date_suffix, ".zip") -# zip::zip(zipfile= zip_program -# , files= paste0("submissions", .Platform$file.sep, "program.R") -# , mode = "cherry-pick" -# ) - -# zip::zip_list(zip_program) -# print(x = zip_program) - -############################### -### Result submission mode - -# Generate a zip file with the prediction -if ( !dir.exists(paths = "submissions") ) { - dir.create(path = "submissions") -} - -prediction_name = "prediction.rds" - -## we save the estimated A matrix as a rds file named 'results.rds' : -saveRDS( -object = pred_prop -, file = paste0("submissions", .Platform$file.sep, prediction_name) -) - -# write_rds(pred_prop, file = "prediction_hugo.rds") - -## we create the associated zip file : -zip_results <- paste0("submissions", .Platform$file.sep, "results_", date_suffix, ".zip") -zip::zipr( - zipfile = zip_results - , files = paste0("submissions", .Platform$file.sep, c(prediction_name) ) - ) -print(x = zip_results) - -sessionInfo( ) - -############################################################### -### How to submit the zip file? /!\ DO NOT CHANGE THIS PART ### -############################################################### -# -# The code above generates the files *`r zip_program`* and *`r zip_results`* (the 1st one for code submission, the 2nd one for result submission). -# -# Submit the zip submission file on the challenge in the `My Submission` tab, fill the metadata, select the task you want to submit to and upload your submission files -# -# On the codalab challenge web page, The *STATUS* become : -# - Submitting -# - Submitted -# - Running -# - Finished -# -# When it’s finished : -# - You refresh the page and click on the green button 'add to leaderboard' to see your score -# - If enable, details for report could be downloaded by clicking on your submission -# - Some execution logs are available to check and or download. -# - Metadata are editable when you click on your submission -# - Leader board is updated in the `Results` tab. -# - diff --git a/phase-1_2_3/starting_kit/submission_script_nnlsmultimodalSource.R b/phase-1_2_3/starting_kit/submission_script_nnlsmultimodalSource.R index 6f44ca5..9c5d86c 100644 --- a/phase-1_2_3/starting_kit/submission_script_nnlsmultimodalSource.R +++ b/phase-1_2_3/starting_kit/submission_script_nnlsmultimodalSource.R @@ -21,7 +21,7 @@ program <- function(mix_rna=NULL, ref_bulkRNA=NULL, ## YOUR CODE BEGINS HERE ## - source("attachement/link_gene_CpG.R") + source("attachement/Source_prior_known_features.R") @@ -32,6 +32,10 @@ program <- function(mix_rna=NULL, ref_bulkRNA=NULL, mix_rna = mix_rna[idx_feat,] ref_bulkRNA = ref_bulkRNA[idx_feat,] + mix_rna = mix_rna[random_choosen_features$random_choosen_genes,] + ref_bulkRNA = ref_bulkRNA[random_choosen_features$random_choosen_genes,] + + prop_rna = apply(mix_rna, 2, function(b, A) { tmp_prop = nnls::nnls(b=b, A=A)$x tmp_prop = tmp_prop / sum(tmp_prop) # Sum To One @@ -51,15 +55,8 @@ program <- function(mix_rna=NULL, ref_bulkRNA=NULL, mix_met = mix_met[idx_feat,] ref_met = ref_met[idx_feat,] - - - # select gene present mix_rna if present - if ( !( is.null(x = mix_rna) ) ) { - probes_feature = probes_feature[probes_feature$gene %in% rownames(mix_rna),] - } - - mix_met = mix_met[rownames(mix_met) %in% rownames(probes_feature),] - ref_met = ref_met[rownames(ref_met) %in% rownames(probes_feature),] + mix_met = mix_met[random_choosen_features$random_choosen_probes,] + ref_met = ref_met[random_choosen_features$random_choosen_probes,] prop_met = apply(mix_met, 2, function(b, A) { diff --git a/phase-1_2_3/starting_kit/submissions/program.R b/phase-1_2_3/starting_kit/submissions/program.R new file mode 100644 index 0000000..513588c --- /dev/null +++ b/phase-1_2_3/starting_kit/submissions/program.R @@ -0,0 +1,13 @@ +program <- +function (mix = NULL, ref = NULL, ...) +{ + idx_feat = intersect(rownames(mix), rownames(ref)) + prop = apply(mix[idx_feat, ], 2, function(b, A) { + tmp_prop = lm(b ~ A - 1)$coefficients + tmp_prop[tmp_prop < 0] = 0 + tmp_prop = tmp_prop/sum(tmp_prop) + return(tmp_prop) + }, A = ref[idx_feat, ]) + rownames(prop) = colnames(ref) + return(prop) +} diff --git a/phase-1_2_3/starting_kit/submissions/program.py b/phase-1_2_3/starting_kit/submissions/program.py deleted file mode 100644 index 48b2f01..0000000 --- a/phase-1_2_3/starting_kit/submissions/program.py +++ /dev/null @@ -1,42 +0,0 @@ -def program(mix=None, ref=None, **kwargs): - - ## - ## YOUR CODE BEGINS HERE - ## - - required_packages = ["sklearn","pandas",'scipy'] - install_and_import_packages(required_packages) - from sklearn.linear_model import LinearRegression - - from attachement import additionnal_script - additionnal_script.useless_function() - - - def estimate_proportions(mix_df, ref_df): - results = [] - for i in range(len(mix_df.columns)): - mix_col = mix_df.iloc[:, i] # Select the i-th column as a Series - res = LinearRegression(fit_intercept=False).fit(ref_df, mix_col).coef_ - # res, _ = scipy.optimize.nnls(ref_df.to_numpy(), mix_col.to_numpy()) - res[res < 0] = 0 - results.append(res) - - # Normalize the results to get proportions - props = pandas.DataFrame([res_i / sum(res_i) for res_i in results], columns=ref_df.columns) - return props.T - - - # Creation of an index, idx_feat, corresponding to the intersection of features present in the references and those present in the mixtures. - idx_feat = mix.index.intersection(ref.index) - mix_filtered = mix.loc[idx_feat, :] - ref_filtered = ref.loc[idx_feat, :] - - prop = estimate_proportions(mix_filtered, ref_filtered) - - # Labeling of estimated proportions - prop.columns = mix.columns - - return prop - ## - ## YOUR CODE ENDS HERE - ## diff --git a/phase-1_2_3/starting_kit_phase1/submission_script.R b/phase-1_2_3/starting_kit_phase1/submission_script.R deleted file mode 100644 index cc9075d..0000000 --- a/phase-1_2_3/starting_kit_phase1/submission_script.R +++ /dev/null @@ -1,246 +0,0 @@ -################################################################################################## -### PLEASE only edit the program function between YOUR CODE BEGINS/ENDS HERE ### -################################################################################################## - -#' The function to estimate the A matrix -#' In the provided example, we use basic non-negative least squares (package "nnls"), which consists of minimizing the error term $||Mix - Ref \times Prop||^2$ with only positive entries in the prop matrix. -#' -#' @param mix a matrix of bulks (columns) and features (rows) -#' @param ref a matrix pure types (columns) and features (rows) -#' @param ... other parameters that will be ignored -#' -#' @return the estimated A matrix -#' -program = function(mix=NULL, ref=NULL, ...) { - - ## - ## YOUR CODE BEGINS HERE - ## - - # Creation of an index, idx_feat, corresponding to the intersection of features present in the references and those present in the mixtures. - idx_feat = intersect(rownames(mix), rownames(ref)) - - # Estimation of proportions - prop = apply(mix[idx_feat,], 2, function(b, A) { - tmp_prop = lm(b ~ A - 1)$coefficients # Using `-1` to remove the intercept - # tmp_prop = nnls::nnls(b=b,A=A)$x - tmp_prop[tmp_prop < 0] = 0 - tmp_prop = tmp_prop / sum(tmp_prop) # Sum To One - return(tmp_prop) - }, A=ref[idx_feat,]) - - # Labeling of estimated proportions - rownames(prop) = colnames(ref) - return(prop) - - ## - ## YOUR CODE ENDS HERE - ## -} - - -############################################################## -### Generate a prediction file /!\ DO NOT CHANGE THIS PART ### -############################################################## - -validate_pred <- function(pred, nb_samples = ncol(mix_rna) , nb_cells= ncol(ref_rna),col_names = colnames(ref_met) ){ - - error_status = 0 # 0 means no errors, 1 means "Fatal errors" and 2 means "Warning" - error_informations = '' - - ## Ensure that all sum ofcells proportion approximately equal 1 - if (!all(sapply(colSums(pred), function(x) isTRUE(all.equal(x, 1) )))) { - msg = "The prediction matrix does not respect the laws of proportions: the sum of each columns should be equal to 1\n" - error_informations = paste(error_informations,msg) - error_status = 2 - } - - ##Ensure that the prediction have the correct names ! - if(! setequal(rownames(pred),col_names) ){ - msg = paste0( "The row names in the prediction matrix should match: ", toString(col_names),"\n") - error_informations = paste(error_informations,msg) - error_status = 2 - } - - ## Ensure that the prediction return the correct number of samples and number of cells. - if (nrow(pred) != nb_cells | ncol(pred) != nb_samples) { - msg= paste0('The prediction matrix has the dimention: ',toString(dim(pred))," whereas the dimention: ",toString(c(nb_cells,nb_samples))," is expected\n" ) - error_informations = paste(error_informations,msg) - error_status = 1 - } - - if(error_status == 1){ - # The error is blocking and should therefor stop the execution. - # tryCatch(message("hello\n"), message=function(e){cat("goodbye\n")}) use this here ? - stop(error_informations) - } - if(error_status == 2){ - print("Warning: ") - warning(error_informations) - } -} - -dir_name = paste0("data",.Platform$file.sep) -dataset_list = list.files(dir_name,pattern="mixes*") - -reference_data <- readRDS(file = paste0(dir_name, "reference_pdac.rds")) - - -predi_list = list() -for (dataset_name in dataset_list){ - - print(paste0("generating prediction for dataset:",toString(dataset_name) )) - - mixes_data <- readRDS(file = paste0(dir_name, dataset_name)) - - if ("mix_rna" %in% names(mixes_data)) { - mix_rna = mixes_data$mix_rna - } else { - mix_rna = mixes_data - } - if ("mix_met" %in% names(mixes_data)) { - mix_met = mixes_data$mix_met - } else { - mix_met = NULL - } - - if ("ref_bulkRNA" %in% names(reference_data)) { - ref_bulkRNA = reference_data$ref_bulkRNA - } else { - ref_bulkRNA = reference_data - } - if ("ref_met" %in% names(reference_data)) { - ref_met = reference_data$ref_met - } else { - ref_met = NULL - } - if ("ref_scRNA" %in% names(reference_data)) { - ref_scRNA = reference_data$ref_scRNA - } else { - ref_scRNA = NULL - } - - # we use the previously defined function 'program' to estimate A : - pred_prop <- program(mix_rna, ref_bulkRNA, mix_met=mix_met, ref_met=ref_met, ref_scRNA=ref_scRNA) - validate_pred(pred_prop,nb_samples = ncol(mix_rna),nb_cells = ncol(ref_bulkRNA),col_names = colnames(ref_met)) - predi_list[[dataset_name]] = pred_prop - -} - - -############################################################## -### Check the prediction /!\ DO NOT CHANGE THIS PART ### -############################################################## - - -############################### -### Code submission mode - - -print("") -for (package in c("zip") ) { - if ( !{ package %in% installed.packages( ) } ) { - print(x = paste("Installation of ", package, sep = "") ) - install.packages( - pkgs = "zip" - , repos = "https://cloud.r-project.org" - ) - } -} - - -# we generate a zip file with the 'program' source code - -if ( !dir.exists(paths = "submissions") ) { - dir.create(path = "submissions") -} - -# we save the source code as a R file named 'program.R' : -dump( - list = c("program") - # list = new_functions - , file = paste0("submissions", .Platform$file.sep, "program.R") -) - -date_suffix = format(x = Sys.time( ), format = "%Y_%m_%d_%H_%M_%S") - - - -zip_program <- paste0("submissions", .Platform$file.sep, "program_", date_suffix, ".zip") -zip::zip(zipfile= zip_program - , files = paste0("submissions", .Platform$file.sep, "program.R") - , mode = "cherry-pick") - -if(dir.exists("attachement")) { - zip::zip_append( - zipfile = zip_program - , files= paste0("attachement", .Platform$file.sep) - , mode = "cherry-pick" - ) -} - -zip::zip_list(zip_program) -print(x = zip_program) - - - - -# # we create the associated zip file : -# zip_program <- paste0("submissions", .Platform$file.sep, "program_", date_suffix, ".zip") -# zip::zip(zipfile= zip_program -# , files= paste0("submissions", .Platform$file.sep, "program.R") -# , mode = "cherry-pick" -# ) - -# zip::zip_list(zip_program) -# print(x = zip_program) - -############################### -### Result submission mode - -# Generate a zip file with the prediction -if ( !dir.exists(paths = "submissions") ) { - dir.create(path = "submissions") -} - -prediction_name = "prediction.rds" - -## we save the estimated A matrix as a rds file named 'results.rds' : -saveRDS( -object = predi_list -, file = paste0("submissions", .Platform$file.sep, prediction_name)) - -# write_rds(pred_prop, file = "prediction_hugo.rds") - -## we create the associated zip file : -zip_results <- paste0("submissions", .Platform$file.sep, "results_", date_suffix, ".zip") -zip::zipr( - zipfile = zip_results - , files = paste0("submissions", .Platform$file.sep, c(prediction_name) ) - ) -print(x = zip_results) - -sessionInfo( ) - -############################################################### -### How to submit the zip file? /!\ DO NOT CHANGE THIS PART ### -############################################################### -# -# The code above generates the files *`r zip_program`* and *`r zip_results`* (the 1st one for code submission, the 2nd one for result submission). -# -# Submit the zip submission file on the challenge in the `My Submission` tab, fill the metadata, select the task you want to submit to and upload your submission files -# -# On the codalab challenge web page, The *STATUS* become : -# - Submitting -# - Submitted -# - Running -# - Finished -# -# When it’s finished : -# - You refresh the page and click on the green button 'add to leaderboard' to see your score -# - If enable, details for report could be downloaded by clicking on your submission -# - Some execution logs are available to check and or download. -# - Metadata are editable when you click on your submission -# - Leader board is updated in the `Results` tab. -# - diff --git a/phase-1_2_3/starting_kit_phase1/submission_script.py b/phase-1_2_3/starting_kit_phase1/submission_script.py deleted file mode 100644 index d6ea552..0000000 --- a/phase-1_2_3/starting_kit_phase1/submission_script.py +++ /dev/null @@ -1,252 +0,0 @@ -################################################################################################## -### PLEASE only edit the program function between YOUR CODE BEGINS/ENDS HERE ### -################################################################################################## - - -######################################################## -### Package dependencies /!\ DO NOT CHANGE THIS PART ### -######################################################## -import subprocess -import sys -import importlib - -def program(mix=None, ref=None, **kwargs): - - ## - ## YOUR CODE BEGINS HERE - ## - - required_packages = ["sklearn","pandas",'scipy'] - install_and_import_packages(required_packages) - from sklearn.linear_model import LinearRegression - - from attachement import additionnal_script - additionnal_script.useless_function() - - - def estimate_proportions(mix_df, ref_df): - results = [] - for i in range(len(mix_df.columns)): - mix_col = mix_df.iloc[:, i] # Select the i-th column as a Series - res = LinearRegression(fit_intercept=False).fit(ref_df, mix_col).coef_ - # res, _ = scipy.optimize.nnls(ref_df.to_numpy(), mix_col.to_numpy()) - res[res < 0] = 0 - results.append(res) - - # Normalize the results to get proportions - props = pandas.DataFrame([res_i / sum(res_i) for res_i in results], columns=ref_df.columns) - return props.T - - - # Creation of an index, idx_feat, corresponding to the intersection of features present in the references and those present in the mixtures. - idx_feat = mix.index.intersection(ref.index) - mix_filtered = mix.loc[idx_feat, :] - ref_filtered = ref.loc[idx_feat, :] - - prop = estimate_proportions(mix_filtered, ref_filtered) - - # Labeling of estimated proportions - prop.columns = mix.columns - - return prop - ## - ## YOUR CODE ENDS HERE - ## - - -# Install and import each package -def install_and_import_packages(required_packages): - for package in required_packages: - try: - globals()[package] = importlib.import_module(package) - except ImportError: - print('impossible to import, installing packages',package) - package_to_install = 'scikit-learn' if package == 'sklearn' else package - subprocess.check_call([sys.executable, "-m", "pip", "install", package_to_install]) - globals()[package] = importlib.import_module(package) - -def validate_pred(pred, nb_samples=None, nb_cells=None, col_names=None): - error_status = 0 # 0 means no errors, 1 means "Fatal errors" and 2 means "Warning" - error_informations = '' - - # Ensure that all sum of cells proportion approximately equal 1 - if not numpy.allclose(numpy.sum(pred, axis=0), 1): - msg = "The prediction matrix does not respect the laws of proportions: the sum of each column should be equal to 1\n" - error_informations += msg - error_status = 2 - - # Ensure that the prediction has the correct names - if not set(col_names) == set(pred.index): - msg = f"The row names in the prediction matrix should match: {col_names}\n" - error_informations += msg - error_status = 2 - - # Ensure that the prediction returns the correct number of samples and number of cells - if pred.shape != (nb_cells, nb_samples): - msg = f'The prediction matrix has the dimension: {pred.shape} whereas the dimension: {(nb_cells, nb_samples)} is expected\n' - error_informations += msg - error_status = 1 - - if error_status == 1: - # The error is blocking and should therefore stop the execution - raise ValueError(error_informations) - if error_status == 2: - print("Warning:") - print(error_informations) - - -############################################################## -### Generate a prediction file /!\ DO NOT CHANGE THIS PART ### -############################################################## - -# List of required packages -required_packages = [ - "numpy", - "pandas", - "rpy2", - "zipfile", - "inspect", -] -install_and_import_packages(required_packages) - -# from rpy2.robjects import pandas2ri -import os -import rpy2.robjects -readRDS = rpy2.robjects.r['readRDS'] -saveRDS= rpy2.robjects.r["saveRDS"] - -from rpy2.robjects import pandas2ri -pandas2ri.activate() - - -r_code_get_rowandcolnames = ''' -get_both <- function(ref_names = "reference_fruits.rds", mat = NULL) { - ref_names <- readRDS(ref_names) - if (!is.null(mat)) { - return(list( rownames(ref_names[[mat]]), colnames(ref_names[[mat]]) )) - } else { - return(list(rownames(ref_names),colnames(ref_names) )) - } -} -''' -rpy2.robjects.r(r_code_get_rowandcolnames) -get_both_row_col = rpy2.robjects.r['get_both'] - - -# # Function to convert R object to pandas DataFrame or numpy array -def r_object_to_python(r_object,file,element_name): - try: - # Try to convert to pandas DataFrame - return pandas2ri.rpy2py(r_object) - except NotImplementedError: - rows, columns =get_both_row_col(file,element_name) - if(isinstance(columns, type (rpy2.robjects.NULL))): - df = pandas.DataFrame(r_object, index=rows) - else: - columns = list(columns) - df = pandas.DataFrame(r_object, columns=columns, index=rows) - return df - -# Function to extract named data elements and convert appropriately -def extract_data_element(data, file, element_name): - if element_name in data.names: - element = data.rx2(element_name) - return r_object_to_python(element,file,element_name) - return None - - -dir_name = "data"+os.sep - -datasets_list = [filename for filename in os.listdir(dir_name) if filename.startswith("mixes")] - -ref_file = os.path.join(dir_name, "reference_pdac.rds") -print("reading reference file") -reference_data = readRDS(ref_file) -ref_bulkRNA = extract_data_element(reference_data,ref_file, 'ref_bulkRNA') -ref_met = extract_data_element(reference_data,ref_file, 'ref_met') -ref_scRNA = extract_data_element(reference_data,ref_file, 'ref_scRNA') - -predi_dic = {} -for dataset_name in datasets_list : - - file= os.path.join(dir_name,dataset_name) - mixes_data = readRDS(file) - - print(f"generating prediction for dataset: {dataset_name}") - - mix_rna = extract_data_element(mixes_data,file, 'mix_rna') - mix_met = extract_data_element(mixes_data,file, 'mix_met') - - pred_prop = program(mix_rna, ref_bulkRNA, mix_met=mix_met, ref_met=ref_met ) - validate_pred(pred_prop, nb_samples=mix_rna.shape[1], nb_cells=ref_bulkRNA.shape[1], col_names=ref_bulkRNA.columns) - predi_dic[dataset_name] = pred_prop - -############################### -### Code submission mode - -# we generate a zip file with the 'program' source code - -if not os.path.exists("submissions"): - os.makedirs("submissions") - -# we save the source code as a Python file named 'program.py': -with open(os.path.join("submissions", "program.py"), 'w') as f: - f.write(inspect.getsource(program)) - -date_suffix = pandas.Timestamp.now().strftime("%Y_%m_%d_%H_%M_%S") - - - - -# we create the associated zip file: -zip_program = os.path.join("submissions", f"program_{date_suffix}.zip") -with zipfile.ZipFile(zip_program, 'w') as zipf: - zipf.write(os.path.join("submissions", "program.py"), arcname="program.py") - - -def zipdir(path, ziph): - # ziph is zipfile handle - for root, dirs, files in os.walk(path): - for file in files: - ziph.write(os.path.join(root, file), - os.path.relpath(os.path.join(root, file), - os.path.join(path, '..'))) -if os.path.exists("attachement"): - with zipfile.ZipFile(zip_program, 'a', zipfile.ZIP_DEFLATED) as zipf: - zipdir('attachement/', zipf) - - - -# # Check if the "attachment" directory exists -# if os.path.exists("attachement"): -# # Append the contents of the "attachment" directory to the zip archive -# with zipfile.ZipFile(zip_program, mode="a") as zf: -# for root, _, files in os.walk("attachement"): -# for file in files: -# file_path = os.path.join(root, file) -# # Add file to zip while preserving directory structure -# arcname = os.path.relpath(file_path, start="attachement") -# zf.write(file_path, arcname) - -print(zip_program) - -############################### -### Result submission mode - -# Generate a zip file with the prediction -if not os.path.exists("submissions"): - os.makedirs("submissions") - -prediction_name = "prediction.rds" - -saveRDS(rpy2.robjects.ListVector(predi_dic), os.path.join("submissions", prediction_name)) - - - -# Create the associated zip file: -zip_results = os.path.join("submissions", f"results_{date_suffix}.zip") -with zipfile.ZipFile(zip_results, 'w') as zipf: - zipf.write(os.path.join("submissions", prediction_name), arcname=prediction_name) - -print(zip_results) -