phyE_Analysis.Rmd

---
title: "phyE mutant analysis"
author: "Julin Maloof"
date: "August 10, 2015; updated July 11-21, 2016"
output: 
  html_document: 
    keep_md: yes
---

```{r preliminaries}
library(ggplot2)
library(lme4)
library(lmerTest)
library(multcomp)
```


# Figure 2
```{r}
fig2ab <- read.csv("Fig2AB_hyps.csv")

summary(fig2ab)

fig2ab$light <- relevel(fig2ab$light,ref="sun")

table(fig2ab$rep,fig2ab$genotype)

sem <- function(x,na.rm=T) {
  if(na.rm) x <- na.omit(x)
  sd(x) / sqrt(length(x))
}
               
```

## Figure 2A
```{r}
fig2a <- droplevels(fig2ab[grep("Arabidopsis",fig2ab$species),])
levels(fig2a$genotype)

fig2a.mean <- with(fig2a,tapply(length,list(light,genotype),mean,na.rm=T))
fig2a.mean

fig2a.sem <- with(fig2a,tapply(length,list(light,genotype),mean,na.rm=T))
fig2a.sem <- with(fig2a,tapply(length,list(light,genotype),sem))

par(las=3,mar=c(6, 4, 4, 2) + 0.1)
x <- barplot(fig2a.mean,beside=T,col=c("grey80","black"),ylim=c(0,max(fig2a.mean+fig2a.sem)),ylab="hyp length (mm)")
arrows(x,fig2a.mean+fig2a.sem,x,fig2a.mean-fig2a.sem,code=3,angle=90,length=.05)
```

## Figure 2B
```{r}
fig2b <- droplevels(fig2ab[grep("S.lyc",fig2ab$species,fixed=T),])
levels(fig2b$genotype)

fig2b.mean <- with(fig2b,tapply(length,list(light,genotype),mean,na.rm=T))
fig2b.mean

fig2b.sem <- with(fig2b,tapply(length,list(light,genotype),mean,na.rm=T))
fig2b.sem <- with(fig2b,tapply(length,list(light,genotype),sem))

par(las=3,mar=c(6, 4, 4, 2) + 0.1)
x <- barplot(fig2b.mean,beside=T,col=c("grey80","black"),ylim=c(0,max(fig2b.mean+fig2b.sem)),ylab="hyp length (mm)")
arrows(x,fig2b.mean+fig2b.sem,x,fig2b.mean-fig2b.sem,code=3,angle=90,length=.05)
```

## Figure 2AB t-tests

```{r}
p.adjust(sapply(levels(fig2ab$genotype), 
                function(gt) {
  t.test(length ~ light, data=fig2ab, subset=grep(gt,genotype))$p.value
}))
```

## Figure 2C

```{r}
fig2c <- read.csv("Fig2C_tomato_int.csv")
summary(fig2c)
fig2c$treatment <- relevel(fig2c$treatment,ref="sun")

fig2c.mean <- with(fig2c,tapply(internode2,list(treatment,genotype),mean,na.rm=T))
fig2c.mean

fig2c.sem <- with(fig2c,tapply(internode2,list(treatment,genotype),mean,na.rm=T))
fig2c.sem <- with(fig2c,tapply(internode2,list(treatment,genotype),sem))

par(las=3,mar=c(6, 4, 4, 2) + 0.1)
x <- barplot(fig2c.mean,beside=T,col=c("grey80","black"),ylim=c(0,max(fig2c.mean+fig2c.sem)),ylab="hyp length (mm)")
arrows(x,fig2c.mean+fig2c.sem,x,fig2c.mean-fig2c.sem,code=3,angle=90,length=.05)

p.adjust(sapply(levels(fig2c$genotype), 
                function(gt) {
  t.test(internode2 ~ treatment, data=fig2c, subset=grep(gt,genotype))$p.value
}))

```


# Figure 4

## Figure 4 data and analysis
```{r}
data <- read.csv("figure4phyE.csv")
summary(data)
data$treatment <- relevel(data$treatment,ref="sun")
data$day <- factor(data$day)
data$flat <- factor(data$flat)
data$epi.int12 <- data$epi + data$int1 + data$int2 #analyze overall stem elongation
data <- droplevels(data[!(data$genotype=="phyEami3"|data$genotype=="phyB1/B2"),])

lmer1 <- lmer(epi.int12 ~ genotype*treatment*day + (1|flat),data=data)
summary(lmer1) 
```

## Figure 4 get model predictions
```{r}
# a bit of messing around to get a prediction data frame that has the pvalues...
fig4.pred.df <- as.data.frame(summary(lmer1)$coefficients)
fig4.pred.df$coefname <- row.names(fig4.pred.df)
fig4.pred.df$genotype <- sapply(fig4.pred.df$coefname,function(x) {
  ifelse(grepl("genotype",x),
         regmatches(x,regexpr("(phyB1)|(phyB1/B2)|(phyEami7)|(phyB2)",x)),
         "Moneymaker")
})
fig4.pred.df$day <- sapply(fig4.pred.df$coefname,function(x) {
  ifelse(grepl("day",x),
         regmatches(x,regexpr("28|35",x)),
         "21")
})
fig4.pred.df$treatment <- with(fig4.pred.df,ifelse(grepl("treatment",coefname),"shade","sun"))
fig4.pred.df$length <- predict(lmer1,fig4.pred.df,re.form=NA)
```

## Figure 4 plot
```{r}
colnames(fig4.pred.df)[c(2,5)] <- c("SE","p.value")
fig4.pred.df$treatment <- factor(fig4.pred.df$treatment,levels=c("sun","shade"))
fig4.pred.df$genotype <- factor(fig4.pred.df$genotype, levels=c("Moneymaker","phyB1","phyB2","phyEami7"), 
                                labels = c("Moneymaker","phyB1","phyB2","PHYE-amiRNA"))
fig4.pred.df$p.value[grepl("^day",fig4.pred.df$coefname)] <- NA # we do not care about the day effect per se
fig4.pred.df$p.value.txt <- NA
fig4.pred.df$p.value.txt[fig4.pred.df$p.value < 0.05] <- "*"
fig4.pred.df$p.value.txt[fig4.pred.df$p.value < 0.01] <- "**"
fig4.pred.df$p.value.txt[fig4.pred.df$p.value < 0.001] <- "***"

pl <- ggplot(fig4.pred.df,aes(x=genotype,y=length,fill=treatment,ymax=length+SE,min=length-SE))
pl <- pl + geom_bar(position="dodge",stat="identity")
pl <- pl + facet_grid(. ~ day, labeller=labeller(day = c("21" = "Week 3", "28" = "Week 4", "35" = "Week 5")))
pl <- pl + theme_bw() +  theme(axis.text.x=element_text(angle=45,vjust =1,hjust=1))
pl <- pl + ylab("length (mm)")
pl <- pl + geom_errorbar(position=position_dodge(width=.9),width=.5)
pl <- pl + geom_text(aes(label=p.value.txt,y=length+SE+2),position=position_dodge(width=0.9))
pl + scale_fill_manual(values=c("sun"="gray80","shade"="black"))
ggsave("fig4.pdf",width=7,height=4)


pl <- ggplot(fig4.pred.df,aes(x=day,y=length,color=treatment,ymax=length+SE,min=length-SE,group=treatment))
pl <- pl + geom_line()
pl <- pl + facet_grid(. ~ genotype)
pl <- pl + theme_bw() 
pl <- pl + ylab("length (mm)")
pl <- pl + geom_errorbar(width=.5)
pl <- pl + geom_text(aes(label=p.value.txt,y=length+SE+2))
pl
ggsave("fig4.alt.pdf",width=7,height=4)


```


# Figure 6

```{r, results='hide'}
data <- read.csv("phyintpetbothreps.csv")
head(data)
summary(data)
data <- data[,!grepl("X",colnames(data))] # get rid of time stamp columns
summary(data)
data$treatment <- relevel(data$treatment,ref="sun")
#We get into trouble later for column names that start with a number, so...
data$genotype <- sub("^([1,2])","B\\1",data$genotype)
data$genotype <- factor(data$genotype,levels=c("MM","A","B1","B2","E3","E7","B12","B1E3","B1E7","B2E3","B2E7","TR3","TR7","QD3","QD7"))
data$rep <- factor(data$rep)
data$start <- factor(data$start)
data$shelf <- sapply(as.character(data$start),switch,
                     "1" = "top",
                     "2" = "middle",
                     "3" = "bottom",
                     "4" = "top",
                     "5" = "middle",
                     "6" = "bottom")
```

## Focus on E7

if we want to only focus on E7 run the following.  Note: results are similar between the two lines, but our qRTPCR support for RNA reductions is not as clear for E3
```{r, eval=TRUE}
data <- data[!grepl("3",data$genotype),]
data <- droplevels(data)
```

## epi+int1+int2

```{r}
data$epi.int12 <- data$epi + data$int1 + data$int2
lmer.epi.int12_1 <- lmer(epi.int12 ~ genotype*treatment + (1|rep) + (1|shelf) + (1|start), data=data)
lmer.epi.int12_2 <- lmer(epi.int12 ~ genotype*treatment + (1|rep) + (1|start), data=data)
anova(lmer.epi.int12_1,lmer.epi.int12_2) # no pref; drop shelf
lmer.epi.int12_3 <- lmer(epi.int12 ~ genotype*treatment + (1|start), data=data)
anova(lmer.epi.int12_2,lmer.epi.int12_3) # keep rep
lmer.epi.int12_4 <- lmer(epi.int12 ~ genotype*treatment + (1|rep), data=data)
anova(lmer.epi.int12_2,lmer.epi.int12_4) # keep start
summary(lmer.epi.int12_2) #final model.
```

Get predictions from model for plotting.  First create a data frame to hold the results.  Then use `predict()` to get the BLUPs and finally extract the errors from the model.
```{r}
epi.int12.results <- data.frame(
  genotype=rep(levels(data$genotype),2),
  treatment=rep(levels(data$treatment),each=nlevels(data$genotype))
)
epi.int12.results$epi.int12 <- predict(lmer.epi.int12_2,epi.int12.results,re.form=NA)
head(epi.int12.results)

epi.int12.results$sem.low <- epi.int12.results$epi.int12 - summary(lmer.epi.int12_2)$coefficients[,"Std. Error"]
epi.int12.results$sem.high <- epi.int12.results$epi.int12 + summary(lmer.epi.int12_2)$coefficients[,"Std. Error"]

#add p-values
epi.int12.results$p.value <- summary(lmer.epi.int12_2)$coefficients[,"Pr(>|t|)"]

# add character representation of significance
epi.int12.results$p.value.txt <- NA
epi.int12.results$p.value.txt[epi.int12.results$p.value < 0.05] <- "*"
epi.int12.results$p.value.txt[epi.int12.results$p.value < 0.01] <- "**"
epi.int12.results$p.value.txt[epi.int12.results$p.value < 0.001] <- "***"
```

plot it
```{r}
epi.int12.results$treatment <- relevel(epi.int12.results$treatment,ref="sun")
epi.int12.results$genotype <- factor(epi.int12.results$genotype,
                                levels=c("MM","A","B1","B2","E3","E7","B12","B1E3","B1E7","B2E3","B2E7","TR3","TR7","QD3","QD7"),
                                labels=unlist(strsplit("MoneyMaker, phyA, phyB1, phyB2, phyE-3, phyE-7, phyB1/phyB2, phyB1/phyE-3, phyB1/phyE-7, phyB2/phyE-3, phyB2/phyE-7, phyB1/phyB2/phyE-3, phyB1/phyB2/phyE-7, phyA/phyB1/phyB2/phyE-3, phyA/phyB1/phyB2/phyE-7",split=", ")))
pl <- ggplot(epi.int12.results,aes(x=genotype,fill=treatment,y=epi.int12))
pl <- pl + geom_bar(stat="identity",position="dodge")
pl <- pl + geom_errorbar(aes(ymin=sem.low,ymax=sem.high),position=position_dodge(width=.9),width=.5)
pl <- pl + scale_fill_manual(values=c("skyblue","darkred")) # feel free to change
pl <- pl + theme(axis.text.x=element_text(angle=45,vjust =1,hjust=1))
pl <- pl + ylab("length (mm)")
pl + geom_text(aes(label=p.value.txt,y=sem.high+2),position=position_dodge(width=0.9))

```

### multiple comparisons
```{r}
make.contr <- function(comparisons, contr) {
  new.matrix <- matrix(0,nrow=length(comparisons),ncol=ncol(contr))
  colnames(new.matrix) <- colnames(contr)
  rownames(new.matrix) <- comparisons
  for(comp in comparisons) {
    coefs <- trimws(unlist(strsplit(comp,split="-")))
    new.matrix[comp,coefs] <- c(1,-1)
  }
  new.matrix}

contr <- matrix(nrow=0,ncol=length(names(fixef(lmer.epi.int12_2))))
colnames(contr) <- sub("genotype","",names(fixef(lmer.epi.int12_2)))
colnames(contr) <- sub("treatment","",colnames(contr))

#each row will represent a contrast, with a -1 and +1 noting what we want to compare

comparisons <- c("B1E7 - B1",
                 "B2E7 - B2",
                 "TR7 - B12",
                 "QD7 - TR7",
                 "B1E7:shade - B1:shade",
                 "B2E7:shade - B2:shade",
                 "TR7:shade - B12:shade",
                 "QD7:shade - TR7:shade")

contr <- make.contr(comparisons = comparisons, contr)

summary(glht(lmer.epi.int12_2, linfct=contr),test=adjusted("holm"))
```