# ============================================
# RESEARCH QUESTION 1: 
# Do male and female Village Weavers differ in wing length?
# Use the excel sheet named "villageweaver" on Ledge
# ============================================

# Load packages
library(ggplot2)
library(dplyr)

# Step 1: Import and explore the data
weaver <- read.csv("villageweaver.csv")

summary(weaver$wing)
table(weaver$sex)

# Step 2: Check assumptions (normality and homogeneity)
# Q: Is wing length normally distributed?
# Use Shapiro-Wilk test
shapiro.test(weaver$wing)
#Use Levene's test
library(car)
leveneTest(weaver$wing, weaver$sex)

# Step 3: Select statistical test
# Independent t-test (if normal)
t.test(weaver$wing ~ weaver$sex, var.equal = TRUE)

#If not normal, use Mann–Whitney U test
wilcox.test(weaver$wing ~ weaver$sex)

tapply(weaver$wing, weaver$sex, mean)
tapply(weaver$wing, weaver$sex, sd)

# Step 4: Visualize the data using boxplot
ggplot(weaver, aes(x = sex, y = wing, fill = sex)) +
  geom_boxplot() +
  labs(x = "Sex", y = "Wing length (mm)")+theme_classic()

# ============================================
# RESEARCH QUESTION 2 (Class exercise):
# Does bird species richness differ between woodland and grassland habitats?
# Use the excel sheet named "independent_t_test" on Ledge
# ============================================

# ============================================
# RESEARCH QUESTION 3:
# Does bird species richness differ before and after rainfall?
# Use the excel sheet named "paired_t_test" on Ledge
# ============================================

library(ggplot2)
library(dplyr)
library(tidyr)

# Step 1: Import and explore the data
paired <- read.csv("paired.csv")
paired

# Step 2: Check assumptions (normality)
# Compute the difference
paired$difference <- paired$after_rain - paired$before_rain
paired
# Use Shapiro-Wilk test
shapiro.test(paired$difference)

# Step 3: Select statistical test

# If normal, Paired t-test (parametric)
t.test(paired$before_rain, paired$after_rain, paired = TRUE)

# If not normal, Wilcoxon signed-rank test (non-parametric)
wilcox.test(paired$before_rain, paired$after_rain, paired = TRUE)

mean(paired$before_rain)
sd(paired$before_rain)

mean(paired$after_rain)
sd(paired$after_rain)

# Step 4: Visualize the data

# Convert to long format for ggplot
paired_long <- paired %>%
  pivot_longer(cols = c(before_rain, after_rain),
               names_to = "condition",
               values_to = "richness")

# Boxplot
ggplot(paired_long, aes(x = condition, y = richness, fill = condition)) +
  geom_boxplot() +
  labs(x = "Condition",
       y = "Bird richness") +
  theme_classic() +
  theme(legend.position = "none")

# Create a simple bar chart showing mean richness
paired_long %>%
  group_by(condition) %>%
  summarise(mean_richness = mean(richness)) %>%
  ggplot(aes(x = condition, y = mean_richness, fill = condition)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "Condition", y = "Mean number of species") +
  theme_classic()

# Data interpretation
# If the p-value < 0.05, we conclude that rainfall significantly affected
# bird species richness across sites.
# If p-value > 0.05, there is no significant difference before and after rain.