#==================================================================
# Order of checks
#1.	Check if data is in the right format for analysis
#2.	Save worksheet as CSV 
#3.	Set Working directory
#4.	Load any libraries (such as dplyr)
#5.	Read csv
#6.	Confirm data is correct and in the right format after load
#7.	A)Check for normality (Shapiro)
    #B)Check skewness and Kurtosis
    #C)Check for variance
    #D)Check for outliers
#8.	Decide if parametric or non-parametric test
#9.	Run test
#=====================================================================

#Code for loading library
require(package)

#Print data to check
print(data)

#Filter data
data%>%filter(thingtofilter<=number)

#add column
data2<-data %>%
  filter(thingtofilter<=number) %>%
  mutate(newcolumn=newcolumndata)

#summarise
data2%>%
  group_by(column)%>%
  summarise(total=n(),
            percent_weight=mean(weight))

#Make into 2 seperate columns

data_wide <- data %>%
  pivot_wider(
    names_from = variable1,
    values_from = variable2
  )

#Changing into a numeric rather than list or interger

Variable1 <- as.numeric(unlist(data_wide$variable1))
Variable2 <- as.numeric(unlist(data_wide$variable2))
    
#histo
hist(data$variable)

#Shapiro
shapiro.test(data$variable)

#Skewness and Kurtosis
Skew_test <-skewness(data$variable)
ku_test<-kurtosis(data$variable)

#Variance

   #If parametric use the following 

var.test(variable 1, variable 2)

   #If non parametric use the following

library(car)
leveneTest(data$variable1,aye$variable2)

# GREATER THAN 0.05 NOT SIGNIFICANT DIFFERENCE BETWEEN THE VARIANCE

# Identify outliers in a numeric vector
outliers <- boxplot.stats(data$variable)$out
print(outliers)

#____________________________________________________________________
#INDEPENDENT SAMPLES
#-----------------------------------------------------------------------

    #PARAMETRIC TESTS

t.test(data$variable ~ data$group)

    #NON PARAMETRIC TESTS WITH DUPLICATE VALUES

two_sample_wilcox <-wilcox.test(data$variable1~data$variable2,exact=FALSE)
print(two_sample_wilcox)

    #NON PARAMETRIC TESTS WITH CONTINOUS VALUES

two_sample_wilcox <-wilcox.test(data$variable1~data$variable2)
print(two_sample_wilcox)

#____________________________________________________________________________
#PAIRED SAMPLES
#----------------------------------------------------------------------------

#PARAMETRIC TESTS

t.test(Variable 1, Variable 2, paired = TRUE)

#NON PARAMETRIC TESTS WITH DUPLICATE VALUES

wilcox.test(Variable1, Variable2, paired = TRUE, exact = FALSE)

#NON PARAMETRIC TESTS WITH CONTINOUS VALUES

wilcox.test(Variable1, Variable2, paired = TRUE)

#_____________________________________________________________________________
#Chi-squared
#-----------------------------------------------------------------------------

#Test of indpendence e.g counts of observations across categories
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Example contingency table
table_data <- table(data$Variable1, data$Variable2)

# Chi-squared test
chisq.test(table_data)

#Chi-squared Goodness-of-Fit Test e.g expected versus observed
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++=

# Observed counts
observed <- c(50, 30, 20)

# Expected proportions (must sum to 1)
expected <- c(0.4, 0.4, 0.2)

# Chi-squared test
chisq.test(x = observed, p = expected)


#Fishers test
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a contingency table
table_data <- table(data$Factor1, data$Factor2)

# Run Fisher's Exact Test
fisher.test(table_data)