# General Linear Models and multiple comparisons # Use the penguins data # RESEARCH QUESTION 1: Does body mass differ between species? # Why use General Linear Model?! penguins model1 <- lm(body_mass ~ species, data = penguins) summary(model1) # Is the effect of species statistically significant? # Which species has the highest mean body mass? # What does the p-value tells you? # What does the Ajusted R-squared value tells you? # Multiple Comparisons library(emmeans) emmeans(model1, pairwise ~ species) # Plot Using ggplot library(ggplot2) ggplot(penguins, aes(x = species, y = body_mass, fill = species)) + geom_boxplot() + labs(x = "Species", y = "Body mass (g)") + theme_classic() # Based on species and sex ggplot(penguins, aes(x = species, y = body_mass, fill = sex)) + geom_boxplot() + labs(x = "Species", y = "Body mass (g)") + theme_classic() # Remove rows where sex is NA library(dplyr) penguins_clean <- penguins %>% filter(!is.na(sex)) ggplot(penguins_clean, aes(x = species, y = body_mass, fill = sex)) + geom_boxplot() + labs(x = "Species", y = "Body mass (g)") + theme_classic() # Does the interaction between species and sex influence body mass? model2 <- lm(body_mass ~ species * sex, data = penguins_clean) summary(model2) emmeans(model2, pairwise ~ sex | species) # Does flipper length vary across island? # Does flipper length vary across species and island?