## ---------------------------------------------------------------------------------------------------------------- ##
## Script name: Geometric_morphometrics
## Codes for the paper: Lack of assortative mating in a modified landscape leads to a hybrid swarm
## Script author: Maurine Vilcot (maurine.vilcot@ens-lyon.fr)
## ---------------------------------------------------------------------------------------------------------------- ##

#### -------- Load data ---------------------------------------------------------------------------------------------
library(vegan) # bstick
library(Morpho) # getMeaningfulPCs
library(mclust) # mclustBIC
library(dplyr) # arrange
library(ggplot2)
library(gridExtra) # grid.arrange
library(fBasics) # dagoTest

# Procruste, PCA, and ANOVA from pronotum landmarks were made on MORPHOJ for the paper
# PCA eigenvalues and scores were stored in following tables
PCeigenvalues <- read.csv("Data/PC_eigenvalues.csv", header = TRUE, dec = ",", sep = ";", row.names=1)
PCscores <- read.csv("Data/PC_scores.txt", header = TRUE, sep = "\t", row.names=1)



#### -------- Broken stick ---------------------------------------------------------------------------------------------
# Broken-stick test on MORPHOJ eigenvalues to identify statistically significant principal components (PCs)
bks <- as.data.frame(bstick(20, tot.var=sum(PCeigenvalues$Eigenvalues))) # bks is the broken stick model you need to manually edit the bstick (it is 50 now) to the number of eigenvalues your file has
components <- seq(from=1, to=nrow(PCeigenvalues), by=1)
PCeigenvalues$comp <- components
components2 <- seq(from=1, to=nrow(bks), by=1)
bks$comp <- components2
plot(PCeigenvalues$comp, PCeigenvalues$Eigenvalues, type="h",xlab="Components", ylab="Eigenvalue")
points(PCeigenvalues$comp, PCeigenvalues$Eigenvalues, type="s",xlab="Components", ylab="Eigenvalue")
lines(bks$comp, bks[,1], col="red")



#### -------- Mclust on PC1 PC2 values ---------------------------------------------------------------------------------------------
ClusteringPronotum <- PCscores[,c("PC1", "PC2")]
BICpronotum <- mclustBIC(ClusteringPronotum)
summary(BICpronotum) # Best model is 4 clusters
MCpronotum <- Mclust(ClusteringPronotum, x=BICpronotum)
summary(MCpronotum, parameters = TRUE)
plot(MCpronotum, what = "uncertainty")
ClusteringPronotum$cluster_pronotumPC2 <- MCpronotum$classification
ClusteringPronotum$uncertainty_pronotumPC2 <- MCpronotum$uncertainty

## According to traditional morphometrics, we can identify the 4 clusters as the 2 species and the 2 sexes
ClusteringPronotum$Species_geometric [ClusteringPronotum$cluster_pronotumPC2 == 1 | ClusteringPronotum$cluster_pronotumPC2 == 4] <- "marginale"
ClusteringPronotum$Species_geometric [ClusteringPronotum$cluster_pronotumPC2 == 2 | ClusteringPronotum$cluster_pronotumPC2 == 3] <- "otagoense"
ClusteringPronotum$Sex_geometric [ClusteringPronotum$cluster_pronotumPC2 == 1 | ClusteringPronotum$cluster_pronotumPC2 == 3] <- "Female"
ClusteringPronotum$Sex_geometric [ClusteringPronotum$cluster_pronotumPC2 == 2 | ClusteringPronotum$cluster_pronotumPC2 == 4] <- "Male"

## Identify high uncertain specimens
ClusteringPronotum$Species_geometric_unc [ClusteringPronotum$cluster_pronotumPC2 == 1 | ClusteringPronotum$cluster_pronotumPC2 == 4] <- "marginale"
ClusteringPronotum$Species_geometric_unc [ClusteringPronotum$cluster_pronotumPC2 == 2 | ClusteringPronotum$cluster_pronotumPC2 == 3] <- "otagoense"
ClusteringPronotum$Species_geometric_unc [ClusteringPronotum$uncertainty_pronotumPC2 >= 0.2 ] <- "uncertain"

## save on computer
ClusteringTraditional <- read.csv2("Output/Clustering_traditional_individuals.csv", row.names = 1)
Clustering <- merge(ClusteringTraditional, ClusteringPronotum, by="row.names", all=TRUE)
rownames(Clustering) <- Clustering[, 1] ## set rownames
Clustering <- Clustering[, -1]



#### --------  stacked histograms of species assignment ---------------------------------------------------------------------------------------------
proba <- merge(MCpronotum$z, Clustering[,c("PC1", "PC2", "Sex", "Lname", "LocationName", "Species_traditional","Species_geometric", "cluster_pronotumPC2","Sex_geometric","Latitude")], by="row.names")
rownames(proba) <- proba[, 1] ## set rownames
colnames(proba)[colnames(proba)=="Row.names"] <- "Code"

proba$marginale_membership <- proba$V1 + proba$V4
proba$otagoense_membership <- proba$V2 + proba$V3
proba$species[proba$marginale_membership>=0.8] <- "marginale"
proba$species[proba$otagoense_membership>=0.8] <- "otagoense"
proba$species[proba$marginale_membership<0.8 & proba$marginale_membership>0.2] <- "uncertain"
proba$membership_certain[proba$species == "uncertain"] <- "No"
proba$membership_certain[proba$species != "uncertain"] <- "Yes"

table(proba$species)
proba <- arrange(proba, Latitude)
row.names(proba) <- paste(proba$Code, " (", proba$Lname, ")", sep="")
write.csv2(proba, "Output/Proba_cluster_pronotum.csv")

barplot(as.matrix(t(proba[,c("marginale_membership", "otagoense_membership")])), col=c("red", "blue"),  #OR proba[,c("V4", "V1", "V2", "V3")])), col=c("red", "red", "blue", "blue") 
        space = 0, border=NA, las=2, cex.names=0.08, cex.axis=0.5, angle=45, ylab = "Probability")



#### -------- PC1 PC2 plot with clustering ---------------------------------------------------------------------------------------------
# PC1 PC2 representation of pronotum analysis with pronotum clustering
ggplot(proba, aes(x=PC1, y=PC2)) +
  geom_point(aes(alpha=membership_certain, shape=Sex_geometric, col = Species_geometric)) +
  scale_shape_manual("Sex", values = c(17,15)) +
  scale_color_manual("Species", values = c("marginale"="red", "otagoense"="blue")) +
  scale_alpha_manual("Certain", values = c(0.4, 1)) +
  theme_minimal()



#### -------- Density plot ---------------------------------------------------------------------------------------------
# Vizualize phenotypic variation
# Plot only locations with more than 20 individuals
tt <- table(proba$Lname)
probaplot <- subset(proba, Lname %in% names(tt[tt > 20]))
probaplot <- arrange(probaplot, Latitude)

#density plot 
plot_density <- ggplot(probaplot, aes(PC1)) + 
  geom_density(position="stack", colour="grey") +
  scale_fill_manual("Species", values = c("marginale"="red", "otagoense"="blue", "uncertain"="darkmagenta")) +
  facet_wrap(~Lname, scales = 'fixed', ncol=1) +
  xlim(range(-0.18,0.18)) +
  geom_vline(xintercept=0, colour="grey") +
  theme(axis.text.x  = element_text(vjust=0.5, size=5)) +
  theme_classic() +  theme(legend.position="none")

#stack histogram by species
plot_histogram <- ggplot(probaplot, aes(PC1)) + 
  geom_histogram(position = "stack", alpha = 0.9, bins=30, aes(fill=species)) + 
  facet_wrap(~Lname, scales = 'fixed', ncol=1) +
  xlim(range(-0.18,0.18)) +
  geom_vline(xintercept=0, colour="grey", line) +
  scale_fill_manual("Species", values = c("marginale"="red", "otagoense"="blue", "uncertain"="green")) +
  theme_classic() +
  theme(axis.text.x  = element_text(vjust=0.5, size=5)) +
  theme(legend.position="none")

grid.arrange(plot_density, plot_histogram, ncol=2)



#### -------- Normality Test (PC1) ---------------------------------------------------------------------------------------------
norm_test <- data.frame()
for (location in c("DB", "DQ", "HR", "AW", "AG", "AL", "AM", "TE", "TW")){
  LOC <- subset(Clustering, Lname==location)
  print(location)
  norm_dago <- dagoTest(LOC$PC1) #Agostino (more powerful and works also for ex-aequo)
  norm_test[location, "d'Agostino Omnibus"] <- norm_dago@test$p.value[1]
}
write.csv(norm_test, "Output/Normality_test_pvalue.csv")



#### -------- PC1 mean & sd by location type ---------------------------------------------------------------------------------------------
DB <- subset(Clustering, Lname=="DB")
DQ <- subset(Clustering, Lname=="DQ")
HR <- subset(Clustering, Lname=="HR")
AW <- subset(Clustering, Lname=="AW")
AG <- subset(Clustering, Lname=="AG")
AL <- subset(Clustering, Lname=="AL")
AM <- subset(Clustering, Lname=="AM")
TE <- subset(Clustering, Lname=="TE")
TW <- subset(Clustering, Lname=="TW")

L_otagoense <- rbind(DB, DQ, HR)
L_marginale <- AW
L_mixed <- rbind(TE, TW, AM, AL, AG)
mean(L_otagoense$PC1)
mean(L_marginale$PC1)
mean(L_mixed$PC1)


