# General Linear Models and multiple comparisons # Use the penguins data # RESEARCH QUESTION 1: Does body mass differ between species? # Does body mass differ between species? # Does body mass differ between species and island? # Does body mass differ between species, island and sex? # Does body mass differ between species, island, sex and year? # Why use General Linear Model?! penguins head(penguins) # Does body mass differ between species? # Assume body mass is normally-distributed model1 <- lm(body_mass ~ species, data = penguins) summary(model1) # Does body mass differ between species and island? model2 <- lm(body_mass ~ species+island, data = penguins) summary(model2) # Does body mass differ between species, island and sex? model3 <- lm(body_mass ~ species+island+sex, data = penguins) summary(model3) # Does body mass differ between species, island, sex and year? model4 <- lm(body_mass ~ species+island+sex+year, data = penguins) summary(model4) model5 <- lm(body_mass ~ species+sex, data = penguins) summary(model5) model6 <- lm(body_mass ~ island, data = penguins) summary(model6) AIC(model1,model2,model3,model4,model5,model6) # Is the effect of species statistically significant? # Which species has the highest mean body mass? # What does the p-value tells you? # What does the Ajusted R-squared value tells you? # Multiple Comparisons library(emmeans) emmeans(model5, pairwise ~ species) # Plot Using ggplot library(ggplot2) ggplot(penguins, aes(x = species, y = body_mass, fill = species)) + geom_boxplot() + labs(x = "Species", y = "Body mass (g)") + theme_classic() # Based on species and sex ggplot(penguins, aes(x = species, y = body_mass, fill = sex)) + geom_boxplot() + labs(x = "Species", y = "Body mass (g)") + theme_classic() # Remove rows where sex is NA library(dplyr) penguins_clean <- penguins %>% filter(!is.na(sex)) ggplot(penguins_clean, aes(x = species, y = body_mass, fill = sex)) + geom_boxplot() + labs(x = "Species", y = "Body mass (g)") + theme_classic() # Does the interaction between species and sex influence body mass? model7 <- lm(body_mass ~ species * sex, data = penguins_clean) summary(model7) emmeans(model7, pairwise ~ sex | species) emmeans(model7, "sex") emmeans(model7, "species") emmeans(model7, ~ species | sex) model7 <- lm(body_mass ~ species * sex, data = penguins_clean) summary(model7) # Does flipper length vary across species,sex, island and year?