Skip to content

Commit

Permalink
smk params for adjusting by marker, lots of yapf formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
tomsasani committed Jan 4, 2024
1 parent b3b0030 commit 36fffe4
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 129 deletions.
84 changes: 44 additions & 40 deletions ihd/plot_ihd_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

plt.rc("font", size=18)

def main(args):

def main(args):
# read in results of IHD scan and validate with pandera
results = pd.read_csv(args.results)
IHDResultSchema.validate(results)
Expand All @@ -19,26 +19,33 @@ def main(args):
MarkerMetadataSchema.validate(markers)

results_merged = results.merge(markers, on="marker").astype({"chromosome": str})

chr_pref = any(["chr" in c for c in results_merged["chromosome"].unique()])
if chr_pref:
results_merged["chromosome"] = results_merged['chromosome'].apply(lambda c: int(c.split("chr")[1]))
results_merged = results_merged[results_merged["chromosome"] != "X"]
results_merged["chromosome"] = results_merged["chromosome"].apply(
lambda c: int(c.split("chr")[1])
)

pctile_label = f"95th_percentile"
results_merged = results_merged[~results_merged["chromosome"].isin(["Y", "M"])]

# get significant markers
signif = results_merged[results_merged["Distance"] >= results_merged[pctile_label]]
pctile_label = f"95th_percentile"

chrom_order = list(map(str, range(1, 23)))
chrom_order.append("X")
chrom_order_idx = dict(zip(chrom_order, range(len(chrom_order))))
results_merged["sort_idx"] = results_merged["chromosome"].apply(lambda c: chrom_order_idx[c])
results_merged.sort_values("sort_idx", inplace=True)
results_merged["sort_idx"] = results_merged["chromosome"].apply(
lambda c: chrom_order_idx[c]
)
results_merged.sort_values(["sort_idx", args.colname], inplace=True)

results_merged["is_significant"] = results_merged["Distance"] >= results_merged[pctile_label]
results_merged["ec"] = results_merged["is_significant"].apply(lambda s: "k" if s else "w")
results_merged["lw"] = results_merged["is_significant"].apply(lambda s: 1 if s else 0.5)
results_merged["is_significant"] = (
results_merged["Distance"] >= results_merged[pctile_label]
)
results_merged["ec"] = results_merged["is_significant"].apply(
lambda s: "k" if s else "w"
)
results_merged["lw"] = results_merged["is_significant"].apply(
lambda s: 1 if s else 0.5
)

# plot manhattan
f, ax = plt.subplots(figsize=(16, 6))
Expand All @@ -49,8 +56,7 @@ def main(args):
y=max_threshold_dist * args.scale,
ls=":",
c="grey",
label="Genome-wide significance threshold " +
r"$\left(p = 0.05\right)$",
label="Genome-wide significance threshold " + r"$\left(p = 0.05\right)$",
lw=2,
)

Expand All @@ -60,10 +66,9 @@ def main(args):
colors = ["#E6803C", "#398D84"]

for i, (
chrom,
chrom_df,
chrom,
chrom_df,
) in enumerate(results_merged.groupby("chromosome", sort=False)):

color_idx = i % 2
xvals = chrom_df[args.colname].values + previous_max
yvals = chrom_df["Distance"].values * args.scale
Expand All @@ -81,19 +86,22 @@ def main(args):
xtick_positions.append(np.median(xvals))
xticks.append(chrom)


ax.set_xticks(xtick_positions)
ax.set_xticklabels(xticks)

n_yticks = 6
max_ypos = np.max([
np.max(results_merged["Distance"].values * args.scale),
max_threshold_dist,
])
min_ypos = np.min([
np.min(results_merged["Distance"].values * args.scale),
max_threshold_dist,
])
max_ypos = np.max(
[
np.max(results_merged["Distance"].values * args.scale),
max_threshold_dist,
]
)
min_ypos = np.min(
[
np.min(results_merged["Distance"].values * args.scale),
max_threshold_dist,
]
)

ytick_pos = np.linspace(min_ypos * 0.95, max_ypos * 1.05, n_yticks)
ytick_labs = [f"{yval:.1e}" for yval in ytick_pos]
Expand All @@ -102,11 +110,11 @@ def main(args):
ax.set_yticklabels(ytick_labs)

# change all spines
for axis in ['top','bottom','left','right']:
for axis in ["top", "bottom", "left", "right"]:
ax.spines[axis].set_linewidth(1.5)

# increase tick width
ax.tick_params(width=2.)
ax.tick_params(width=2.0)

sns.despine(ax=ax, top=True, right=True)
ax.set_xlabel("Chromosome")
Expand All @@ -125,8 +133,7 @@ def main(args):
y=max_threshold_dist * args.scale,
ls=":",
c="grey",
label="Genome-wide significance threshold " +
r"$\left(p = 0.05\right)$",
label="Genome-wide significance threshold " + r"$\left(p = 0.05\right)$",
lw=2,
)

Expand All @@ -148,30 +155,27 @@ def main(args):

f.savefig(args.out, dpi=300)


if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument(
"--results",
help=
"Output of IHD scan, with an entry for every genotyped marker, in CSV format.",
help="Output of IHD scan, with an entry for every genotyped marker, in CSV format.",
)
p.add_argument(
"--markers",
help=
"File containing metadata (cM position, Mb position, or both) about each genotyped marker.",
help="File containing metadata (cM position, Mb position, or both) about each genotyped marker.",
)
p.add_argument(
"--out",
type=str,
default="ihd.png",
help=
"Name of output plot. Default is 'ihd.png'",
help="Name of output plot. Default is 'ihd.png'",
)
p.add_argument(
"-colname",
default="Mb",
help=
"Name of the column in `--markers` that denotes the position you which to plot on the x-axis of the Manhattan plot. Default is 'Mb'",
help="Name of the column in `--markers` that denotes the position you which to plot on the x-axis of the Manhattan plot. Default is 'Mb'",
)
p.add_argument(
"-chrom",
Expand All @@ -182,7 +186,7 @@ def main(args):
"-scale",
type=int,
default=1,
help="""Scale the cosine distance values by the specified amount to make visualization a bit easier on the y-axis. Default is 1000."""
help="""Scale the cosine distance values by the specified amount to make visualization a bit easier on the y-axis. Default is 1000.""",
)
args = p.parse_args()
main(args)
main(args)
Loading

0 comments on commit 36fffe4

Please sign in to comment.