#================================================================== # Order of checks #1. Check if data is in the right format for analysis #2. Save worksheet as CSV #3. Set Working directory #4. Load any libraries (such as dplyr) #5. Read csv #6. Confirm data is correct and in the right format after load #7. A)Check for normality (Shapiro) #B)Check skewness and Kurtosis #C)Check for variance #D)Check for outliers #8. Decide if parametric or non-parametric test #9. Run test #===================================================================== #Code for loading library require(package) #Print data to check print(data) #Filter data data%>%filter(thingtofilter<=number) #add column data2<-data %>% filter(thingtofilter<=number) %>% mutate(newcolumn=newcolumndata) #summarise data2%>% group_by(column)%>% summarise(total=n(), percent_weight=mean(weight)) #Make into 2 seperate columns data_wide <- data %>% pivot_wider( names_from = variable1, values_from = variable2 ) #Changing into a numeric rather than list or interger Variable1 <- as.numeric(unlist(data_wide$variable1)) Variable2 <- as.numeric(unlist(data_wide$variable2)) #histo hist(data$variable) #Shapiro shapiro.test(data$variable) #Skewness and Kurtosis Skew_test <-skewness(data$variable) ku_test<-kurtosis(data$variable) #Variance #If parametric use the following var.test(variable 1, variable 2) #If non parametric use the following library(car) leveneTest(data$variable1,aye$variable2) # GREATER THAN 0.05 NOT SIGNIFICANT DIFFERENCE BETWEEN THE VARIANCE # Identify outliers in a numeric vector outliers <- boxplot.stats(data$variable)$out print(outliers) #____________________________________________________________________ #INDEPENDENT SAMPLES #----------------------------------------------------------------------- #PARAMETRIC TESTS t.test(data$variable ~ data$group) #NON PARAMETRIC TESTS WITH DUPLICATE VALUES two_sample_wilcox <-wilcox.test(data$variable1~data$variable2,exact=FALSE) print(two_sample_wilcox) #NON PARAMETRIC TESTS WITH CONTINOUS VALUES two_sample_wilcox <-wilcox.test(data$variable1~data$variable2) print(two_sample_wilcox) #____________________________________________________________________________ #PAIRED SAMPLES #---------------------------------------------------------------------------- #PARAMETRIC TESTS t.test(Variable 1, Variable 2, paired = TRUE) #NON PARAMETRIC TESTS WITH DUPLICATE VALUES wilcox.test(Variable1, Variable2, paired = TRUE, exact = FALSE) #NON PARAMETRIC TESTS WITH CONTINOUS VALUES wilcox.test(Variable1, Variable2, paired = TRUE) #_____________________________________________________________________________ #Chi-squared #----------------------------------------------------------------------------- #Test of indpendence e.g counts of observations across categories #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Example contingency table table_data <- table(data$Variable1, data$Variable2) # Chi-squared test chisq.test(table_data) #Chi-squared Goodness-of-Fit Test e.g expected versus observed #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++= # Observed counts observed <- c(50, 30, 20) # Expected proportions (must sum to 1) expected <- c(0.4, 0.4, 0.2) # Chi-squared test chisq.test(x = observed, p = expected) #Fishers test #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Create a contingency table table_data <- table(data$Factor1, data$Factor2) # Run Fisher's Exact Test fisher.test(table_data)