Skip to content

Commit

Permalink
analysis: use the ResourceManager, update the configuration files and…
Browse files Browse the repository at this point in the history
… JUnit tests for Liftover analysis, #TASK-7064, #TASK-7049
  • Loading branch information
jtarraga committed Dec 19, 2024
1 parent 36ac05b commit 180abfd
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 97 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.utils.FileUtils;
import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy;
import org.opencb.opencga.analysis.wrappers.exomiser.ExomiserWrapperAnalysis;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.exceptions.ResourceException;
import org.opencb.opencga.core.api.ParamConstants;
import org.opencb.opencga.core.common.TimeUtils;
import org.opencb.opencga.core.exceptions.ToolException;
Expand Down Expand Up @@ -62,6 +65,9 @@ public class LiftoverWrapperAnalysis extends OpenCgaToolScopeStudy {
private String vcfDest;
private Path resourcePath;

private org.opencb.opencga.catalog.utils.ResourceManager resourceManager = null;
private List<String> liftoverResourceIds = null;

@ToolParams
protected final LiftoverWrapperParams analysisParams = new LiftoverWrapperParams();

Expand All @@ -87,9 +93,11 @@ protected void check() throws Exception {
}
if (LIFTOVER_GRCH38.equalsIgnoreCase(analysisParams.getTargetAssembly())) {
targetAssembly = LIFTOVER_GRCH38;
liftoverResourceIds = Arrays.asList("REFERENCE_GENOME_GRCH38_FA", "REFERENCE_GENOME_GRCH37_FA");
}
if (LIFTOVER_HG38.equalsIgnoreCase(analysisParams.getTargetAssembly())) {
targetAssembly = LIFTOVER_HG38;
liftoverResourceIds = Arrays.asList("REFERENCE_GENOME_HG38_FA", "REFERENCE_GENOME_HG19_FA");
}
if (!LIFTOVER_GRCH38.equals(targetAssembly) && !LIFTOVER_HG38.equals(targetAssembly)) {
throw new ToolException("Unknown Liftover 'targetAssembly' parameter ('" + analysisParams.getTargetAssembly() + "'), "
Expand All @@ -114,6 +122,12 @@ protected void check() throws Exception {
}
vcfDest = path.toString();
}

// Check resources
resourceManager = new org.opencb.opencga.catalog.utils.ResourceManager(getOpencgaHome());
for (String liftoverResourceId : liftoverResourceIds) {
resourceManager.checkResourcePath(liftoverResourceId);
}
}

@Override
Expand All @@ -132,36 +146,14 @@ protected void run() throws ToolException, IOException {
step(CLEAN_RESOURCES_STEP, this::cleanResources);
}

protected void prepareResources() throws IOException, ToolException {
protected void prepareResources() throws IOException, ToolException, ResourceException {
// Create folder where the liftover resources will be saved (within the job dir, aka outdir)
resourcePath = Files.createDirectories(getOutDir().resolve(RESOURCES_DIRNAME));

// Identify Liftover resources to download only the required ones
Map<String, List<String>> mapResources = new HashMap<>();
switch (targetAssembly) {
case LIFTOVER_GRCH38: {
mapResources.put("reference-genome", Arrays.asList("Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz",
"Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"));
break;
}
case LIFTOVER_HG38: {
mapResources.put("reference-genome", Arrays.asList("hg19.fa.gz", "hg38.fa.gz"));
break;
}
default: {
throw new ToolException("Unknown Liftover 'targetAssembly' parameter ('" + analysisParams.getTargetAssembly() + "'), "
+ VALID_TARGET_ASSEMBLIES);
}
}

// Download resources and copy them to the job dir
// (this URL is temporary, it should be replaced by the resourceUrl from configuration file)
ResourceManager resourceManager = new ResourceManager(getOpencgaHome(), "http://resources.opencb.org/task-6766/");
for (Map.Entry<String, List<String>> entry : mapResources.entrySet()) {
for (String resourceName : entry.getValue()) {
java.io.File resourceFile = resourceManager.getResourceFile(entry.getKey(), resourceName);
Files.copy(resourceFile.toPath(), resourcePath.resolve(resourceFile.getName()));
}
// Create resources from the installation folder
for (String liftoverResourceId : liftoverResourceIds) {
Path installPath = resourceManager.checkResourcePath(liftoverResourceId);
FileUtils.copyFile(installPath.toFile(), resourcePath.resolve(installPath.getFileName()).toFile());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.TestParamConstants;
import org.opencb.opencga.analysis.clinical.ClinicalAnalysisLoadTask;
import org.opencb.opencga.analysis.resource.ResourceFetcherTool;
import org.opencb.opencga.analysis.tools.ToolRunner;
import org.opencb.opencga.analysis.variant.gwas.GwasAnalysis;
import org.opencb.opencga.analysis.variant.hrdetect.HRDetectAnalysis;
Expand All @@ -55,7 +56,7 @@
import org.opencb.opencga.analysis.wrappers.liftover.LiftoverWrapperAnalysis;
import org.opencb.opencga.catalog.db.api.SampleDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.io.IOManager;
import org.opencb.opencga.catalog.exceptions.ResourceException;
import org.opencb.opencga.catalog.managers.AnnotationSetManager;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.catalog.utils.Constants;
Expand All @@ -64,6 +65,7 @@
import org.opencb.opencga.core.api.ParamConstants;
import org.opencb.opencga.core.common.ExceptionUtils;
import org.opencb.opencga.core.common.JacksonUtils;
import org.opencb.opencga.core.config.Configuration;
import org.opencb.opencga.core.config.storage.CellBaseConfiguration;
import org.opencb.opencga.core.config.storage.StorageConfiguration;
import org.opencb.opencga.core.exceptions.ToolException;
Expand All @@ -83,6 +85,7 @@
import org.opencb.opencga.core.models.organizations.OrganizationUpdateParams;
import org.opencb.opencga.core.models.project.ProjectCreateParams;
import org.opencb.opencga.core.models.project.ProjectOrganism;
import org.opencb.opencga.core.models.resource.ResourceFetcherToolParams;
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.models.sample.SampleQualityControl;
import org.opencb.opencga.core.models.sample.SampleReferenceParam;
Expand All @@ -108,7 +111,6 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.*;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -1147,7 +1149,7 @@ public void testLiftoverDestinationJobDir() throws IOException, ToolException, C
Path liftOutdir = Paths.get(opencga.createTmpOutdir("_liftOutdir"));
System.out.println("Liftover outdir = " + liftOutdir);

prepareLiftoverResourcesIfLocal();
Assume.assumeTrue(areLiftoverResourcesReady());

String basename = "NA12877_S1.1k";
File file = prepareLiftoverInputFile(basename + ".vcf.gz", "biofiles");
Expand All @@ -1168,7 +1170,7 @@ public void testLiftoverDestinationVcfInputFolder() throws IOException, ToolExce
Path liftOutdir = Paths.get(opencga.createTmpOutdir("_liftOutdir"));
System.out.println("Liftover outdir = " + liftOutdir);

prepareLiftoverResourcesIfLocal();
Assume.assumeTrue(areLiftoverResourcesReady());

String basename = "NA12877_S1.1k";
File file = prepareLiftoverInputFile(basename + ".vcf.gz", "biofiles");
Expand All @@ -1190,7 +1192,7 @@ public void testLiftoverDestinationUserFolder() throws IOException, ToolExceptio
Path liftOutdir = Paths.get(opencga.createTmpOutdir("_liftOutdir"));
System.out.println("Liftover outdir = " + liftOutdir);

prepareLiftoverResourcesIfLocal();
Assume.assumeTrue(areLiftoverResourcesReady());

Path folderPath = Paths.get("custom", "folder");
File destCustomFolder = catalogManager.getFileManager().createFolder(STUDY, folderPath.toString(), true, null, QueryOptions.empty(),
Expand Down Expand Up @@ -1231,33 +1233,39 @@ private File prepareLiftoverInputFile(String filename, String folder) throws IOE
return file;
}

private void prepareLiftoverResourcesIfLocal() throws IOException {
Path resourcePath = opencga.getOpencgaHome().resolve("analysis").resolve("resources");
Path resourcesLocalPath = Paths.get("../../../data/opencga/resources/liftover").toAbsolutePath();
if (Files.exists(resourcesLocalPath)) {
if (!Files.exists(resourcePath.resolve("liftover"))) {
Files.createDirectories(resourcePath.resolve("liftover"));
}
if (resourcePath.resolve("liftover").toFile().listFiles().length != 2) {
for (java.io.File file : resourcesLocalPath.toFile().listFiles()) {
java.io.File destFile = resourcePath.resolve("liftover").resolve(file.getName()).toFile();
System.out.println("Copy from " + file + " to " + destFile.toPath());
Files.copy(file.toPath(), destFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
}
}
private boolean areLiftoverResourcesReady() throws IOException {
Configuration configuration = opencga.getConfiguration();
configuration.getAnalysis().getResource().setBasePath(opencga.getOpencgaHome().resolve(ResourceManager.ANALYSIS_DIRNAME).resolve(ResourceManager.RESOURCES_DIRNAME));
configuration.getAnalysis().getResource().setBaseUrl("http://resources.opencb.org/opencb/opencga/analysis/resources/");
JacksonUtils.getDefaultObjectMapper().writerFor(Configuration.class).writeValue(opencga.getOpencgaHome().resolve("conf/configuration.yml").toFile(), configuration);

ResourceManager resourceManager = new ResourceManager(opencga.getOpencgaHome());

try {
resourceManager.checkResourcePath("REFERENCE_GENOME_HG38_FA");
resourceManager.checkResourcePath("REFERENCE_GENOME_HG19_FA");
return true;
} catch (ResourceException e) {
System.out.println("First checking if Liftover resources are ready, failed. So they will be downloaded");
}
resourcesLocalPath = Paths.get("../../../data/opencga/resources/reference-genome").toAbsolutePath();
if (Files.exists(resourcesLocalPath)) {
if (!Files.exists(resourcePath.resolve("reference-genome"))) {
Files.createDirectories(resourcePath.resolve("reference-genome"));
}
if (resourcePath.resolve("reference-genome").toFile().listFiles().length != 4) {
for (java.io.File file : resourcesLocalPath.toFile().listFiles()) {
java.io.File destFile = resourcePath.resolve("reference-genome").resolve(file.getName()).toFile();
System.out.println("Copy from " + file + " to " + destFile.toPath());
Files.copy(file.toPath(), destFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
}
}

try {
ResourceFetcherToolParams params = new ResourceFetcherToolParams()
.setResources(Arrays.asList("REFERENCE_GENOME_HG*"));

Path fetcherOutdir = Paths.get(opencga.createTmpOutdir());
toolRunner.execute(ResourceFetcherTool.class, params, new ObjectMap(ParamConstants.STUDY_PARAM, ParamConstants.ADMIN_STUDY),
fetcherOutdir, null, false, opencga.getAdminToken());

System.out.println("fetcherOutdir = " + fetcherOutdir);

resourceManager.checkResourcePath("REFERENCE_GENOME_HG38_FA");
resourceManager.checkResourcePath("REFERENCE_GENOME_HG19_FA");
return true;
} catch (ResourceException | ToolException e) {
e.printStackTrace();
System.out.println("Error downloading Liftover resources via ResourceFetcherTool, so JUnit tests won't be executed");
return false;
}
}

Expand Down
85 changes: 48 additions & 37 deletions opencga-catalog/src/test/resources/configuration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,65 +28,65 @@ analysis:
# List of resources
files:
# Exomiser resources
- id: "EXOMISER_13_1_0_HG19"
url: "exomiser/13.1.0/2109_hg19.zip"
md5: ""
path: "exomiser/13.1.0/2109_hg19"
- id: "EXOMISER_13_1_0_HG38"
url: "exomiser/13.1.0/2109_hg38.zip"
md5: ""
md5: "6d4011146705ca7a28eba9df0bfdacd2"
path: "exomiser/13.1.0/2109_hg38"
- id: "EXOMISER_13_1_0_PHENOTYPE"
url: "exomiser/13.1.0/2109_phenotype.zip"
md5: ""
md5: "197d126e1b441674f5d5979ac2027591"
path: "exomiser/13.1.0/2109_phenotype"
- id: "EXOMISER_14_0_0_HG19"
url: "exomiser/14.0.0/2402_hg19.zip"
md5: ""
path: "exomiser/2402_hg19"
- id: "EXOMISER_14_0_0_HG38"
url: "exomiser/14.0.0/2402_hg38.zip"
md5: ""
path: "exomiser/2402_hg38"
md5: "7f89dcf9221c7aa5d5471a2ffdb58303"
path: "exomiser/14.0.0/2402_hg38"
- id: "EXOMISER_14_0_0_PHENOTYPE"
url: "exomiser/14.0.0/2402_phenotype.zip"
md5: ""
md5: "3e7092c852c5f23373ccd465bf2fcd85"
path: "exomiser/14.0.0/2402_phenotype"

# Reference genomes
- id: "REFERENCE_GENOME_GRCH38_FA"
url: "reference-genome/20240812_091434/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
md5: ""
path: "reference-genome/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
url: "reference-genomes/grch38/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
md5: "9732aa3c64469b25b73930db9c96a89f"
path: "reference-genomes/grch38/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
- id: "REFERENCE_GENOME_GRCH38_FAI"
url: "reference-genome/20240812_091434/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz.fai"
md5: ""
path: "reference-genome/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz.fai"
url: "reference-genomes/grch38/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz.fai"
md5: "d527f3eb6b664020cf4d882b5820056f"
path: "reference-genomes/grch38/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz.fai"
- id: "REFERENCE_GENOME_GRCH38_GZI"
url: "reference-genome/20240812_091434/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz.gzi"
md5: ""
path: "reference-genome/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz.gzi"
url: "reference-genomes/grch38/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz.gzi"
md5: "e904772fc5dfa2d69690aafda36332a4"
path: "reference-genomes/grch38/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz.gzi"
- id: "REFERENCE_GENOME_GRCH37_FA"
url: "reference-genome/20240812_091434/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz"
md5: ""
path: "reference-genome/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz"
url: "reference-genomes/grch37/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz"
md5: "39a3fff4234a1268c937ec012b3cabb4"
path: "reference-genomes/grch37/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz"
- id: "REFERENCE_GENOME_GRCH37_FAI"
url: "reference-genome/20240812_091434/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.fai"
md5: ""
path: "reference-genome/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.fai"
url: "reference-genomes/grch37/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.fai"
md5: "15d081d2bdfe37d0fff6b753a4dcfef0"
path: "reference-genomes/grch37/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.fai"
- id: "REFERENCE_GENOME_GRCH37_GZI"
url: "reference-genome/20240812_091434/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.gzi"
md5: ""
path: "reference-genome/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.gzi"
url: "reference-genomes/grch37/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.gzi"
md5: "bed234b55a30d624159e8577def7005c"
path: "reference-genomes/grch37/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.gzi"
- id: "REFERENCE_GENOME_HG38_FA"
url: "reference-genomes/hg38/hg38.fa.gz"
md5: "56d2da608721db4d805279301ddc902d"
path: "reference-genomes/hg38/hg38.fa.gz"
- id: "REFERENCE_GENOME_HG19_FA"
url: "reference-genomes/hg19/hg19.fa.gz"
md5: "b6631a1b3c6e1a5db6ee94c21a460ae7"
path: "reference-genomes/hg19/hg19.fa.gz"

# Relatedness resources
- id: "RELATEDNESS_VARIANTS_PRUNE_IN"
url: "data/relatedness/relatedness_thresholds.tsv"
md5: "97175d1574f14fe45bae13ffe277f295"
url: "relatedness/20221026_152242/variants.prune.in"
md5: "9d04ef8199108cfa1223f5217f96d144"
path: "relatedness/variants.prune.in"
- id: "RELATEDNESS_VARIANTS_FRQ"
url: "data/ibd/prune-20241015.out"
md5: "0d7ce55d5c48870698633ca7694e95d3"
url: "relatedness/20221026_152242/variants.frq"
md5: "30c42b1a87b5c028f1d8201fd47935e6"
path: "relatedness/variants.frq"

# Docker used by OpenCGA analysis and containing external tools such as samtools, bcftools, tabix, fastqc, plink1.9, bwa and r-base
Expand All @@ -96,12 +96,14 @@ analysis:
- id: "exomiser"
version: "13.1.0"
dockerId: "exomiser/exomiser-cli:13.1.0"
resources: ["EXOMISER_13_1_0_HG19", "EXOMISER_13_1_0_HG38", "EXOMISER_13_1_0_PHENOTYPE"]
resources: ["EXOMISER_13_1_0_HG38", "EXOMISER_13_1_0_PHENOTYPE"]

- id: "exomiser"
version: "14.0.0"
defaultVersion: true
dockerId: "exomiser/exomiser-cli:14.0.0"
resources: ["EXOMISER_14_0_0_HG19", "EXOMISER_14_0_0_HG38", "EXOMISER_14_0_0_PHENOTYPE"]
resources: ["EXOMISER_14_0_0_HG38", "EXOMISER_14_0_0_PHENOTYPE"]

- id: "mutational-signature"
resources:
- "REFERENCE_GENOME_GRCH38_FA"
Expand All @@ -110,8 +112,17 @@ analysis:
- "REFERENCE_GENOME_GRCH37_FA"
- "REFERENCE_GENOME_GRCH37_FAI"
- "REFERENCE_GENOME_GRCH37_GZI"

- id: "relatedness"
resources: ["RELATEDNESS_VARIANTS_PRUNE_IN", "RELATEDNESS_VARIANTS_FRQ"]

- id: "liftover"
resources:
- "REFERENCE_GENOME_GRCH38_FA"
- "REFERENCE_GENOME_GRCH37_FA"
- "REFERENCE_GENOME_HG38_FA"
- "REFERENCE_GENOME_HG19_FA"

execution:
# Accepted values are "local", "SGE", "azure-batch", "k8s"
# see org.opencb.opencga.master.monitor.executors.ExecutorFactory
Expand Down
4 changes: 2 additions & 2 deletions opencga-core/src/main/resources/configuration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,10 @@ analysis:
- "REFERENCE_GENOME_GRCH37_FA"
- "REFERENCE_GENOME_GRCH37_FAI"
- "REFERENCE_GENOME_GRCH37_GZI"
-

- id: "relatedness"
resources: ["RELATEDNESS_VARIANTS_PRUNE_IN", "RELATEDNESS_VARIANTS_FRQ"]

- id: "liftover"
resources:
- "REFERENCE_GENOME_GRCH38_FA"
Expand Down

0 comments on commit 180abfd

Please sign in to comment.