Skip to content

Commit

Permalink
feat: add a phenotypic pca as a comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
zietzm committed Aug 30, 2024
1 parent b4c0e11 commit 21eb61f
Showing 1 changed file with 31 additions and 1 deletion.
32 changes: 31 additions & 1 deletion src/maxgcp/cli/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def compare_maxh(
output_file: Annotated[
Path, typer.Option("--out", help="Path to output GWAS summary statistics file")
],
):
) -> None:
"""Run MaxH on a set of GWAS summary statistics."""
logger.info("Loading phenotypic covariance matrix")
sep = "," if phenotype_covariance_file.suffix == ".csv" else "\t"
Expand Down Expand Up @@ -88,3 +88,33 @@ def compare_maxh(
maxh_weights_df = pd.DataFrame(maxh_weights, index=index, columns=columns)
maxh_weights_df.to_csv(output_file, sep="\t")
logger.info("Done")


@app.command(name="pca")
def pca(
phenotype_covariance_file: Annotated[
Path,
typer.Option("--pcov", exists=True, help="Path to phenotypic covariance file"),
],
output_file: Annotated[
Path, typer.Option("--out", help="Path to output file")
] = Path("maxgcp_pca.tsv"),
) -> None:
"""Compute PCA on a set of phenotypes."""
logger.info("Loading phenotypic covariance matrix")
sep = "," if phenotype_covariance_file.suffix == ".csv" else "\t"
phenotypic_covariance_df = pd.read_csv(
phenotype_covariance_file, sep=sep, index_col=0
)
logger.info("Computing PCA")
eigvals, eigvecs = np.linalg.eigh(phenotypic_covariance_df.values)
order = np.argsort(eigvals)[::-1]
projection = eigvecs[:, order]
logger.info("Saving PCA weights")
phenotypes = phenotypic_covariance_df.index.rename("phenotype")
projections = pd.Index(
[f"PC{i}" for i in range(1, len(phenotypes) + 1)], name="projection"
)
pca_weights_df = pd.DataFrame(projection, index=phenotypes, columns=projections)
pca_weights_df.to_csv(output_file, sep="\t")
logger.info("Done")

0 comments on commit 21eb61f

Please sign in to comment.