# Continuous trait evolution
#---------------------------

using PhyloNetworks
using PhyloPlots
using RCall
using CSV
using DataFrames
using StatsModels

topologies = readMultiTopology("xiphophorus_networks_calibrated.tre");
plot(topologies[1], :R);
plot(topologies[2], :R);
net3 = topologies[3];
plot(net3, :R)
plot(net3, :R, useEdgeLength=true)
taxa_net = tipLabels(net3)

## load trait data, then delete rows for taxa not in the tree
##----------------

dat = CSV.read("xiphophorus_morphology_Cui_etal_2013.csv", copycols=true)
for i in reverse(1:nrow(dat))
    j = findfirst(isequal(dat[:tipNames][i]), taxa_net)
    if j==nothing # taxon not found in network
        println("taxon ", dat[:tipNames][i], " (row $i) not found in network")
        deleterows!(dat, i) # error
    end
end
# taxon Xnezahualcoyotl (row 19) not found in network

## sword index: ancestral state prediction
##----------------
dat_si = dat[:, [:sword_index, :tipNames]]
AS_si = ancestralStateReconstruction(dat_si, net3)
R"par(mar=c(.5,.5,.5,.5))";
plot(net3, :R, nodeLabel=expectationsPlot(AS_si), xlim=[0,25]);
plot(net3, :R, nodeLabel=predintPlot(AS_si), useEdgeLength=true, xlim=[-3,26]);
# compare ancestral state prediction interval to:
extrema(dat[:sword_index])

## female preference: ancestral state prediction
dat_fp = dat[:, [:preference, :tipNames]]
AS_fp = ancestralStateReconstruction(dat_fp, net3)
plot(net3, :R, nodeLabel=expectationsPlot(AS_fp), xlim=[0,25]);
plot(net3, :R, nodeLabel=predintPlot(AS_fp), useEdgeLength=true, xlim=[-3,26]);
extrema(skipmissing(dat[:preference]))

## phylogenetic signal
##----------------
lambda_si = phyloNetworklm(@formula(sword_index ~ 1), dat, net3, model="lambda")
lambda_fp = phyloNetworklm(@formula(preference ~ 1),  dat, net3, model="lambda")

## phylogenetic regression: does preference influence sword index?
##----------------
@rlibrary ggplot2
ggplot(dat, aes(x=:preference, y=:sword_index)) + geom_point(alpha=0.9) +
  xlab("female preference") + ylab("sword index");
fit_BM = phyloNetworklm(@formula(sword_index ~ preference), dat, net3)
fit_λ  = phyloNetworklm(@formula(sword_index ~ preference), dat, net3, model="lambda")

## transgressive evolution
##----------------

regNet = regressorHybrid(net3)
plot(net3, :R, showEdgeNumber=true, xlim=[0,18]);
dat3 = join(dat, regNet, on = :tipNames)
fit0 = phyloNetworklm(@formula(sword_index ~ 1), dat3, net3)
fit1 = phyloNetworklm(@formula(sword_index ~ sum), dat3, net3)
fit2 = phyloNetworklm(@formula(sword_index ~ shift_24 + shift_37 + shift_45),
                            dat3, net3)
ftest(fit2, fit1, fit0)

## discrete traits
##----------------
using StatsBase

# discretize sword index

dat[:sword_index] .> 0.3 # as if present means index > 0.3. true/false 
dat[:sword] = "absent"
dat[:sword][dat[:sword_index] .> 0.3] .= "present"
dat # to check

# plot the trait

hi = findall([!e.isMajor for e in net3.edge]) # "h"ybrid "i"ndices: 11, 36, 43
o = indexin(taxa_net, dat[:tipNames]) # o = order to take in data, to match taxon order in network
dat[:tipNames][o] ==  taxa_net        # true
o = [findfirst(dat[:tipNames], tax) for tax in taxa_net] # order
swordlab = dat[:sword][o];
swordpch = map(x -> (x=="present" ? 21 : 22), swordlab);
swordcol = map(x -> (x=="present" ? "red" : "yellow"), swordlab);
# R"pdf"("swordpresence.pdf", width=6, height=5);
R"par"(mar=[0,0,0,0]);
res = plot(net3, :R, useEdgeLength=true, xlim=[1.0,24], tipOffset=0.3);
# coordinates for minor hybrid edges:
hx1, hx2, hy1, hy2 = (res[i][hi] for i in 9:12);
# same as this longest way:
# hx1 = res[9][hi];  hx2 = res[10][hi]; hy1 = res[11][hi]; hy2 = res[12][hi]
R"lines"(x=[1,2], y=[1,1], lwd=2); # scale bar for edge lengths
R"text"(x=1.5, y=1.2, labels="1", adj=[0,0], cex=0.8);
R"arrows"(hx1, hy1, hx2, hy2, col="deepskyblue", length=0.08, angle=20);
R"text"(res[14][hi,:x].-0.002, res[14][hi,:y], res[14][hi,:gam], col="deepskyblue", cex=0.75);
R"points"(x=res[13][:x].+0.001, y=res[13][:y], pch=swordpch, bg=swordcol, cex=1.5);
R"legend"(x=1, y=21, legend=["present","absent"], pch=21:22, var"pt.bg"=["red","yellow"],
          bty="n",title="sword", var"title.adj"=0);
# R"dev.off()";

# estimate rates

model1 = BinaryTraitSubstitutionModel([0.1, 0.1], unique(dat[:sword]));
fit1 = fitDiscrete(net3, model1, dat[[:tipNames,:sword]])
aic(fit1) # 30.18292876865171

model2 = EqualRatesSubstitutionModel(2, 0.1, unique(dat[:sword]));
fit2 = fitDiscrete(net3, model2, dat[[:tipNames,:sword]])
aic(fit2) # 29.691362054507735

# ancestral state predictions
asr1 = ancestralStateReconstruction(fit1)
asr2 = ancestralStateReconstruction(fit2)

# plot ancestral states for fit2 (equal rates)

R"library(ape)"; # floating.pie.asp not visible from within R otherwise
colnames = names(asr2)[3:end] # column names, as symbols
colsword = ["red","yellow"]; # color for pollinators
asr2[:fake] = "";
netfit2 = fit2.net # for node numbers that correspond to those in asr2
# R"pdf"("swordpresence_ancestralstates.pdf", width=6, height=5);
R"par"(mar=[0,0,0,0]);
res = plot(netfit2, :R, nodeLabel = asr2[[:nodenumber, :fake]], ylim=[0.8,23.2],
           xlim=[1,20], tipOffset=0.3);
# coordinates for minor hybrid edges:
hx1, hx2, hy1, hy2 = (res[i][hi] for i in 9:12);
R"lines"(x=[1,2], y=[1,1], lwd=2); # scale bar for edge lengths
R"text"(x=1.5, y=1.2, labels="1", adj=[0,0], cex=0.8);
R"arrows"(hx1, hy1, hx2, hy2, col="deepskyblue", length=0.08, angle=20);
R"text"(res[14][hi,:x].-0.002, res[14][hi,:y], res[14][hi,:gam], col="deepskyblue", cex=0.75);
R"legend"(x=1, y=21, legend=["present","absent"], pch=21:22, var"pt.bg"=["red","yellow"],
          bty="n",title="sword", var"title.adj"=0);
for i in 1:nrow(asr2)
    ii = findfirst(isequal(string(asr2[:nodenumber][i])), res[13][:num]);
    if ii==nothing error("oops: i=$i, node $(asr2[:nodenumber][i]) not found"); end
    colpp = [asr2[i,col] for col in colnames]; # colors for posterior probability
    R"floating.pie.asp"(res[13][ii, :x], res[13][ii, :y], colpp,
       radius=0.16, col=colsword);
end
#R"dev.off()";

