convert biogeobears output for plotting revgadgets (original) (raw)

bgb_to_revgadgets <- function(results_path, geo_data_path, tree_path, area_names = NULL) {

load biogeobears results object

load(results_path)

change data directories in results object

res[["inputs"]]$geogfn <- geo_data_path

res[["inputs"]]$trfn <- tree_path

##### Process data for plotting #####

read in tree separately

tree <- RevGadgets::readTrees(paths = res[["inputs"]]$trfn)

states <- res$inputs$all_geog_states_list_usually_inferred_from_areas_maxareas

create a dataframe with results in revgadgets compliant format

rev_data <- data.frame(matrix(nrow = nrow(res$relative_probs_of_each_state_at_branch_bottom_below_node_UPPASS),

ncol = 15))

colnames(rev_data) <- c("end_state_1", "end_state_2", "end_state_3",

"end_state_1_pp", "end_state_2_pp", "end_state_3_pp",

"end_state_other_pp",

"start_state_1", "start_state_2", "start_state_3",

"start_state_1_pp", "start_state_2_pp", "start_state_3_pp",

"start_state_other_pp",

"node")

get end states

for (i in 1:nrow(res$ML_marginal_prob_each_state_at_branch_top_AT_node)) {

row <- res$ML_marginal_prob_each_state_at_branch_top_AT_node[i,]

rev_data[i, 1] <- order(row,decreasing=T)[1]

rev_data[i, 2] <- order(row,decreasing=T)[2]

rev_data[i, 3] <- order(row,decreasing=T)[3]

rev_data[i, 4] <- row[order(row,decreasing=T)[1]]

rev_data[i, 5] <- row[order(row,decreasing=T)[2]]

rev_data[i, 6] <- row[order(row,decreasing=T)[3]]

rev_data[i, 7] <- sum(row[order(row,decreasing=T)[4:length(row)]])

}

get start states

for (i in 1:nrow(res$ML_marginal_prob_each_state_at_branch_bottom_below_node)) {

row <- res$ML_marginal_prob_each_state_at_branch_bottom_below_node[i,]

rev_data[i, 8] <- order(row,decreasing=T)[1]

rev_data[i, 9] <- order(row,decreasing=T)[2]

rev_data[i, 10] <- order(row,decreasing=T)[3]

rev_data[i, 11] <- row[order(row,decreasing=T)[1]]

rev_data[i, 12] <- row[order(row,decreasing=T)[2]]

rev_data[i, 13] <- row[order(row,decreasing=T)[3]]

rev_data[i, 14] <- sum(row[order(row,decreasing=T)[4:length(row)]])

}

rev_data$node <- 1:nrow(res$ML_marginal_prob_each_state_at_branch_bottom_below_node)

make better labels

tipranges <- getranges_from_LagrangePHYLIP(res[["inputs"]]$geogfn)

geo <- res$inputs$all_geog_states_list_usually_inferred_from_areas_maxareas

geo_num <- unlist(lapply(lapply(geo, as.character), paste0, collapse ="_"))

if (is.null(area_names)) {

area_names <- colnames(tipranges@df)

}

if (length(area_names) != length(colnames(tipranges@df))) stop("Number of specified area names is incorrect. Check your geo data file.")

number_codes <- 0:(length(area_names)-1)

geo_letters <- geo_num

recodes <- paste0(paste0(number_codes, " = '", area_names, "'"), collapse = "; ")

for (i in 1:length(geo_letters)) {

code <- geo_letters[i]

code_split <- unlist(strsplit(code, "_"))

geo_letters[i] <- paste0(car::recode(code_split, recodes), collapse = "")

}

#area_names <- rev(area_names)

label_dict <- data.frame(lab_num_short = 1:length(geo),

lab_num_long = geo_num,

lab_letters = geo_letters)

replace short number codes with letters

not_state_cols <- c(grep("_pp", colnames(rev_data)),

grep("node", colnames(rev_data)))

state_cols <- c(1:ncol(rev_data))[!c(1:ncol(rev_data)) %in% not_state_cols]

for (i in state_cols) { # loop through by column indices for the state columns

col <- as.character(rev_data[,i])

for (j in 1:length(col)) { # loop through each item in the column and replace with letters

col[j] <- label_dict$lab_letters[which(label_dict$lab_num_short == col[j])]

}

rev_data[,i] <- col

}

#change "NA" to NA

rev_data %>%

naniar::replace_with_na_all(condition = ~.x == "NA") -> rev_data

change any NAs in PP columns to 0

pp_cols <- grep("_pp", colnames(rev_data))

for (p in pp_cols) { rev_data[ ,p][is.na(rev_data[ ,p])] <- 0 }

make treedata object (combine data and tree)

tibble::as_tibble(tree[[1]][[1]]) %>%

full_join(rev_data, by = 'node') %>%

as.treedata() -> rev_treedata

#add list of states

attributes(rev_treedata)$state_labels <- as.character(na.omit(unique(unlist(rev_data[,c(1:3, 8:10)]))))

return(rev_treedata)

}