#t test examples
#8/10/25

#install or load packages 
library(tidyr)
library(dplyr)
library(ggplot2)

#This example uses the built in sleep package which show the
#effect of two soporific drugs (increase in hours of sleep
#compared to control) on 10 patients 
#for the sake of this example we will assume there are
#20 patients in an independent design

#bring in the data
sleep

?sleep

#check the data
str(sleep)
head(sleep)
summary(sleep)
#to get a summary by group
by(sleep$extra, sleep$group, summary)

#check for normality
tapply(sleep$extra, sleep$group, shapiro.test)

#check for equal variance
var.test(sleep$extra ~ sleep$group)

#Run the t test
t.test(sleep$extra ~ sleep$group, var.equal=TRUE)

#present the results- basic

boxplot(sleep$extra~sleep$group)

#present the results-alternative

SLsummary<-sleep %>%  group_by(group) %>%  
  summarise(    mean_extra=mean(extra, na.rm=TRUE),    
                se_extra=sd(extra, na.rm=TRUE)/sqrt(n())  )

ggplot(SLsummary, aes(x=group, y=mean_extra, fill=group))+       
  geom_col()+         
  geom_errorbar(aes(ymin=mean_extra-se_extra,                            
                    ymax=mean_extra+se_extra),                        
                width=0.2)

ggplot(sleep,aes(y=extra,x=group))+
  geom_boxplot(colour="blue",fill="green")+
  theme_classic()

#OR

ggplot(sleep,aes(y=extra,x=group,fill = group))+
  geom_boxplot()+
  theme_classic()


#Try this with the ToothGrowth data set

GP<-ToothGrowth

by(GP$len, GP$supp, summary)

#check for normality
tapply(GP$len, GP$supp, shapiro.test)

#check for equal variance
var.test(GP$len~ GP$supp)

#data is not normal so have to use a wilcoxon test

wilcox.test(GP$len~ GP$supp)

#present this in a boxplot with median lines

#to get the values of the median
aggregate(ToothGrowth$len~ToothGrowth$supp, FUN=median)
#ggplot boxplotlibrary(ggplot2)
M<-ggplot(ToothGrowth, aes(x=supp, y=len, fill=supp))+  geom_boxplot()+theme_classic()
library(ggsignif)
M+geom_signif(comparisons=list(c("VC","OJ")),              
              map_signif_level = TRUE)

#what if the ToothGrowth data was a repeated measures
supp = subset(ToothGrowth, select = -c(dose) )
df <- unstack(supp)

#work out the difference in the paired data
df$diff<-df$OJ-df$VC

shapiro.test(df$diff)
#data is normally distributed so can use a paired t test

t.test(df$OJ,df$VC, paired = TRUE)


GP_summary<-GP %>%  group_by(supp) %>%  
  summarise(    mean_len=mean(len, na.rm=TRUE),    
                se_len=sd(len, na.rm=TRUE)/sqrt(n())  )  
ggplot(GP_summary, aes(x=supp, y=mean_len, fill=supp))+       
  geom_col()+         
  geom_errorbar(aes(ymin=mean_len-se_len,                            
                    ymax=mean_len+se_len),                        
                width=0.2)

#We can treat the sleep data as a repeated measures design
#subset and unstack or use the method in last weeks session

slsub = subset(sleep, select = -c(ID) )
df <- unstack(slsub)

#calculate the difference
diff<-df$X1-df$X2

shapiro.test(diff)

#no need for equal variance as this is a repeated measures design
#data is not normally distributed so use a Wilcoxon test
#as it is a repeated measures we could use the difference column and compare to a median of 0, ie there is no difference

wilcox.test(diff, mu = 0)