#t test examples #8/10/25 #install or load packages library(tidyr) library(dplyr) library(ggplot2) #This example uses the built in sleep package which show the #effect of two soporific drugs (increase in hours of sleep #compared to control) on 10 patients #for the sake of this example we will assume there are #20 patients in an independent design #bring in the data sleep ?sleep #check the data str(sleep) head(sleep) summary(sleep) #to get a summary by group by(sleep$extra, sleep$group, summary) #check for normality tapply(sleep$extra, sleep$group, shapiro.test) #check for equal variance var.test(sleep$extra ~ sleep$group) #Run the t test t.test(sleep$extra ~ sleep$group, var.equal=TRUE) #present the results- basic boxplot(sleep$extra~sleep$group) #present the results-alternative SLsummary<-sleep %>% group_by(group) %>% summarise( mean_extra=mean(extra, na.rm=TRUE), se_extra=sd(extra, na.rm=TRUE)/sqrt(n()) ) ggplot(SLsummary, aes(x=group, y=mean_extra, fill=group))+ geom_col()+ geom_errorbar(aes(ymin=mean_extra-se_extra, ymax=mean_extra+se_extra), width=0.2) ggplot(sleep,aes(y=extra,x=group))+ geom_boxplot(colour="blue",fill="green")+ theme_classic() #OR ggplot(sleep,aes(y=extra,x=group,fill = group))+ geom_boxplot()+ theme_classic() #Try this with the ToothGrowth data set GP<-ToothGrowth by(GP$len, GP$supp, summary) #check for normality tapply(GP$len, GP$supp, shapiro.test) #check for equal variance var.test(GP$len~ GP$supp) #data is not normal so have to use a wilcoxon test wilcox.test(GP$len~ GP$supp) #present this in a boxplot with median lines #to get the values of the median aggregate(ToothGrowth$len~ToothGrowth$supp, FUN=median) #ggplot boxplotlibrary(ggplot2) M<-ggplot(ToothGrowth, aes(x=supp, y=len, fill=supp))+ geom_boxplot()+theme_classic() library(ggsignif) M+geom_signif(comparisons=list(c("VC","OJ")), map_signif_level = TRUE) #what if the ToothGrowth data was a repeated measures supp = subset(ToothGrowth, select = -c(dose) ) df <- unstack(supp) #work out the difference in the paired data df$diff<-df$OJ-df$VC shapiro.test(df$diff) #data is normally distributed so can use a paired t test t.test(df$OJ,df$VC, paired = TRUE) GP_summary<-GP %>% group_by(supp) %>% summarise( mean_len=mean(len, na.rm=TRUE), se_len=sd(len, na.rm=TRUE)/sqrt(n()) ) ggplot(GP_summary, aes(x=supp, y=mean_len, fill=supp))+ geom_col()+ geom_errorbar(aes(ymin=mean_len-se_len, ymax=mean_len+se_len), width=0.2) #We can treat the sleep data as a repeated measures design #subset and unstack or use the method in last weeks session slsub = subset(sleep, select = -c(ID) ) df <- unstack(slsub) #calculate the difference diff<-df$X1-df$X2 shapiro.test(diff) #no need for equal variance as this is a repeated measures design #data is not normally distributed so use a Wilcoxon test #as it is a repeated measures we could use the difference column and compare to a median of 0, ie there is no difference wilcox.test(diff, mu = 0)