match metadata to phylo object to color tips by season

18 Views Asked by At

I'm looking to color my asv names from two trees (with the same asvs) to match asvnames_subset$season metadata. I have tried reordering the levels across each tree but the coloring seems random. I've tried asvnames_subset$sample_id and other variables (while also changing the rainbow() values to match) but I cannot find any pattern to the coloration.

# organize character vectors 
    new_subset <- newtree_subset$tip.label
    names_subset <- append(new_subset, oldtree_subset$tip.label)
    names_subset <- str_unique(names_subset)
    asvnames_subset <- asvnames[asvnames$asv %in% names_subset,]
    asvnames_subset <- asvnames_subset %>% arrange(factor(asv,levels=c(names_subset)))
    
#order and add colors
    samp <- factor(asvnames_subset$season)
    samp <- append(samp,rainbow(4))
    
#plot
    par(mfrow = c(1, 2))
    plot.phylo(newtree_subset,type="phylogram",use.edge.length = TRUE,
               show.tip.label = TRUE,tip.color=samp)
    plot.phylo(oldtree_subset,type="phylogram",use.edge.length = TRUE,
               show.tip.label = TRUE,tip.color = samp)

Colors are appearing and trees look correct, but the color orders are so off.

> dput(newtree_subset)
structure(list(edge = structure(c(18L, 19L, 20L, 20L, 21L, 22L, 
22L, 23L, 23L, 24L, 24L, 21L, 25L, 25L, 26L, 26L, 19L, 27L, 27L, 
28L, 28L, 29L, 29L, 30L, 31L, 32L, 32L, 31L, 33L, 33L, 30L, 18L, 
19L, 20L, 1L, 21L, 22L, 2L, 23L, 3L, 24L, 4L, 5L, 25L, 6L, 26L, 
7L, 8L, 27L, 9L, 28L, 10L, 29L, 11L, 30L, 31L, 32L, 12L, 13L, 
33L, 14L, 15L, 16L, 17L), dim = c(32L, 2L)), edge.length = c(0.007348363, 
0.016493094, 0.104453305, 0.010596877, 0.013533708, 0.043647895, 
0.040830049, 0.011827973, 0.007328203, 0.047965295, 0.047310929, 
0.038399086, 0.041560419, 0.012778779, 0.041790179, 5e-09, 0.008738777, 
0.032398892, 0.001858496, 0.011269214, 0.027206662, 0.033704088, 
5e-09, 0.024320975, 0.047228053, 0.009550482, 0.024708817, 0.006491692, 
0.030251663, 0.030603045, 0.033843658, 0.05445387), Nnode = 16L, 
    node.label = c("0.901", "0.786", "0.794", "0.753", "0.787", 
    "0.976", "0.358", "0.975", "0.724", "0.841", "0.688", "0.946", 
    "0.666", "0.938", "0.994", "0.380"), tip.label = c("a1bebf28de164805905291ce00b49d51", 
    "0138b4edc2e8818f9d594c158bbf4f20", "5cf2490c5286757fd316cff11b6977a9", 
    "22c41b072712a6d3a503a499d2601755", "6a3ea3980435bb709d2ef0dacfa1c752", 
    "2101f9bb6f5b9d9c8af9eaa3f711be03", "1a3dd6e7c593a37bf37fd6c6382efa53", 
    "98e3d05c0e8980b343dddd744adc4260", "0014a67d0e23b6ea5e33206b0f69ba18", 
    "5bee4123b4b3da7f09ecfabd30286fda", "9c002ac16e0d8660a7c9f836c1aa5e60", 
    "100acb5a73f74c73da6d9b4e43402a57", "97dc807436722059ee30761b239fbe6e", 
    "0d98bbd4292b6bb2eeef3dc5de8933e6", "d7b2ba0afc3bd176266fae72daed8326", 
    "bc1c63a0304b3ff8518f27ddd7871b4b", "6eecdd96b3bcbc550cf76daaf65ac44c"
    ), root.edge = 0.742350621), class = "phylo", order = "cladewise", group = structure(c(1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), levels = c("0", "1"), class = "factor"))

> dput(oldtree_subset)
structure(list(Nnode = 16L, tip.label = c("d7b2ba0afc3bd176266fae72daed8326", 
"bc1c63a0304b3ff8518f27ddd7871b4b", "100acb5a73f74c73da6d9b4e43402a57", 
"97dc807436722059ee30761b239fbe6e", "0d98bbd4292b6bb2eeef3dc5de8933e6", 
"5bee4123b4b3da7f09ecfabd30286fda", "9c002ac16e0d8660a7c9f836c1aa5e60", 
"0014a67d0e23b6ea5e33206b0f69ba18", "22c41b072712a6d3a503a499d2601755", 
"0138b4edc2e8818f9d594c158bbf4f20", "5cf2490c5286757fd316cff11b6977a9", 
"6a3ea3980435bb709d2ef0dacfa1c752", "b75ee709949995ec8c2b8d97349ec842", 
"a1bebf28de164805905291ce00b49d51", "30db1fb805b6581bab7e0ef5604663ba", 
"cfd611b09cee6f8ac6b139bbbcf59919", "e234c0d07b0652afe259ec8b7d551ecb"
), node.label = c("0.743", "0.863", "0.876", "0.932", "0.774", 
"0.843", "0.901", "0.788", "0.489", "0.887", "0.846", "0.947", 
"0.471", "0.692", "0.852", "0.427"), edge = structure(c(18L, 
19L, 20L, 20L, 21L, 22L, 22L, 21L, 19L, 18L, 23L, 24L, 25L, 26L, 
26L, 25L, 24L, 23L, 27L, 28L, 29L, 30L, 30L, 29L, 31L, 31L, 28L, 
32L, 33L, 33L, 32L, 27L, 19L, 20L, 17L, 21L, 22L, 16L, 15L, 14L, 
13L, 23L, 24L, 25L, 26L, 12L, 11L, 10L, 9L, 27L, 28L, 29L, 30L, 
8L, 7L, 31L, 6L, 5L, 32L, 33L, 4L, 3L, 2L, 1L), dim = c(32L, 
2L)), edge.length = c(0.008489367, 0.012378135, 0.099356344, 
0.018610025, 0.151006816, 0.020717346, 0.021791617, 0.169732458, 
0.096117467, 0.01142402, 0.031539499, 5e-09, 0.00822259, 0.054142434, 
0.04399721, 0.105824174, 0.059621329, 0.037877628, 0.026251384, 
0.016803509, 0.025350434, 0.025888111, 0.0540378, 0.006197142, 
0.03678638, 0.075242268, 0.035636402, 0.08815712, 0.042846825, 
0.005280567, 0.037514183, 0.059076984), root = 2043, root.edge = 0.840963913), class = "phylo", order = "cladewise", group = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), levels = c("0", "1"), class = "factor"))

> dput(asvnames_subset)
structure(list(sequence_id = c("NASQAN2015.148.348", "NASQAN2015.163.348", 
"NASQAN2015.203.394", "NASQAN2015.161.348", "NASQAN2015.161.348", 
"NASQAN2015.148.348", "NASQAN2015.163.348", "NASQAN2015.161.348", 
"NASQAN2015.163.348", "NASQAN2015.147.348", "NASQAN2015.162.348", 
"NASQAN2016.009.394", "NASQAN2015.147.348", "NASQAN2015.148.348", 
"NASQAN2015.148.348", "NASQAN2015.204.394", "NASQAN2016.009.394", 
"NASQAN2015.162.348", "NASQAN2016.013.394", "NASQAN2015.163.348", 
"NASQAN2016.005.394", "NASQAN2015.161.348", "NASQAN2015.204.394", 
"NASQAN2015.163.348", "NASQAN2015.148.348", "NASQAN2016.014.394", 
"NASQAN2015.147.348", "NASQAN2015.161.348", "NASQAN2015.163.348"
), asv = c("a1bebf28de164805905291ce00b49d51", "a1bebf28de164805905291ce00b49d51", 
"a1bebf28de164805905291ce00b49d51", "0138b4edc2e8818f9d594c158bbf4f20", 
"5cf2490c5286757fd316cff11b6977a9", "22c41b072712a6d3a503a499d2601755", 
"22c41b072712a6d3a503a499d2601755", "6a3ea3980435bb709d2ef0dacfa1c752", 
"2101f9bb6f5b9d9c8af9eaa3f711be03", "1a3dd6e7c593a37bf37fd6c6382efa53", 
"1a3dd6e7c593a37bf37fd6c6382efa53", "1a3dd6e7c593a37bf37fd6c6382efa53", 
"98e3d05c0e8980b343dddd744adc4260", "0014a67d0e23b6ea5e33206b0f69ba18", 
"5bee4123b4b3da7f09ecfabd30286fda", "9c002ac16e0d8660a7c9f836c1aa5e60", 
"100acb5a73f74c73da6d9b4e43402a57", "97dc807436722059ee30761b239fbe6e", 
"0d98bbd4292b6bb2eeef3dc5de8933e6", "d7b2ba0afc3bd176266fae72daed8326", 
"bc1c63a0304b3ff8518f27ddd7871b4b", "6eecdd96b3bcbc550cf76daaf65ac44c", 
"b75ee709949995ec8c2b8d97349ec842", "30db1fb805b6581bab7e0ef5604663ba", 
"cfd611b09cee6f8ac6b139bbbcf59919", "cfd611b09cee6f8ac6b139bbbcf59919", 
"e234c0d07b0652afe259ec8b7d551ecb", "e234c0d07b0652afe259ec8b7d551ecb", 
"e234c0d07b0652afe259ec8b7d551ecb"), sample_id = c("NASQAN2015.148", 
"NASQAN2015.163", "NASQAN2015.203", "NASQAN2015.161", "NASQAN2015.161", 
"NASQAN2015.148", "NASQAN2015.163", "NASQAN2015.161", "NASQAN2015.163", 
"NASQAN2015.147", "NASQAN2015.162", "NASQAN2016.009", "NASQAN2015.147", 
"NASQAN2015.148", "NASQAN2015.148", "NASQAN2015.204", "NASQAN2016.009", 
"NASQAN2015.162", "NASQAN2016.013", "NASQAN2015.163", "NASQAN2016.005", 
"NASQAN2015.161", "NASQAN2015.204", "NASQAN2015.163", "NASQAN2015.148", 
"NASQAN2016.014", "NASQAN2015.147", "NASQAN2015.161", "NASQAN2015.163"
), season = c("summer", "summer", "autumn", "spring", "spring", 
"summer", "summer", "spring", "summer", "spring", "spring", "spring", 
"spring", "summer", "summer", "winter", "spring", "spring", "summer", 
"summer", "winter", "spring", "winter", "summer", "summer", "summer", 
"spring", "spring", "summer")), class = "data.frame", row.names = c(NA, 
-29L))
0

There are 0 best solutions below