match metadata to phylo object to color tips by season

Question

match metadata to phylo object to color tips by season

18 Views Asked by Geomicro At 14 February 2024 at 23:09

I'm looking to color my asv names from two trees (with the same asvs) to match asvnames_subset$season metadata. I have tried reordering the levels across each tree but the coloring seems random. I've tried asvnames_subset$sample_id and other variables (while also changing the rainbow() values to match) but I cannot find any pattern to the coloration.

# organize character vectors 
    new_subset <- newtree_subset$tip.label
    names_subset <- append(new_subset, oldtree_subset$tip.label)
    names_subset <- str_unique(names_subset)
    asvnames_subset <- asvnames[asvnames$asv %in% names_subset,]
    asvnames_subset <- asvnames_subset %>% arrange(factor(asv,levels=c(names_subset)))
    
#order and add colors
    samp <- factor(asvnames_subset$season)
    samp <- append(samp,rainbow(4))
    
#plot
    par(mfrow = c(1, 2))
    plot.phylo(newtree_subset,type="phylogram",use.edge.length = TRUE,
               show.tip.label = TRUE,tip.color=samp)
    plot.phylo(oldtree_subset,type="phylogram",use.edge.length = TRUE,
               show.tip.label = TRUE,tip.color = samp)

Colors are appearing and trees look correct, but the color orders are so off.

> dput(newtree_subset)
structure(list(edge = structure(c(18L, 19L, 20L, 20L, 21L, 22L, 
22L, 23L, 23L, 24L, 24L, 21L, 25L, 25L, 26L, 26L, 19L, 27L, 27L, 
28L, 28L, 29L, 29L, 30L, 31L, 32L, 32L, 31L, 33L, 33L, 30L, 18L, 
19L, 20L, 1L, 21L, 22L, 2L, 23L, 3L, 24L, 4L, 5L, 25L, 6L, 26L, 
7L, 8L, 27L, 9L, 28L, 10L, 29L, 11L, 30L, 31L, 32L, 12L, 13L, 
33L, 14L, 15L, 16L, 17L), dim = c(32L, 2L)), edge.length = c(0.007348363, 
0.016493094, 0.104453305, 0.010596877, 0.013533708, 0.043647895, 
0.040830049, 0.011827973, 0.007328203, 0.047965295, 0.047310929, 
0.038399086, 0.041560419, 0.012778779, 0.041790179, 5e-09, 0.008738777, 
0.032398892, 0.001858496, 0.011269214, 0.027206662, 0.033704088, 
5e-09, 0.024320975, 0.047228053, 0.009550482, 0.024708817, 0.006491692, 
0.030251663, 0.030603045, 0.033843658, 0.05445387), Nnode = 16L, 
    node.label = c("0.901", "0.786", "0.794", "0.753", "0.787", 
    "0.976", "0.358", "0.975", "0.724", "0.841", "0.688", "0.946", 
    "0.666", "0.938", "0.994", "0.380"), tip.label = c("a1bebf28de164805905291ce00b49d51", 
    "0138b4edc2e8818f9d594c158bbf4f20", "5cf2490c5286757fd316cff11b6977a9", 
    "22c41b072712a6d3a503a499d2601755", "6a3ea3980435bb709d2ef0dacfa1c752", 
    "2101f9bb6f5b9d9c8af9eaa3f711be03", "1a3dd6e7c593a37bf37fd6c6382efa53", 
    "98e3d05c0e8980b343dddd744adc4260", "0014a67d0e23b6ea5e33206b0f69ba18", 
    "5bee4123b4b3da7f09ecfabd30286fda", "9c002ac16e0d8660a7c9f836c1aa5e60", 
    "100acb5a73f74c73da6d9b4e43402a57", "97dc807436722059ee30761b239fbe6e", 
    "0d98bbd4292b6bb2eeef3dc5de8933e6", "d7b2ba0afc3bd176266fae72daed8326", 
    "bc1c63a0304b3ff8518f27ddd7871b4b", "6eecdd96b3bcbc550cf76daaf65ac44c"
    ), root.edge = 0.742350621), class = "phylo", order = "cladewise", group = structure(c(1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), levels = c("0", "1"), class = "factor"))

> dput(oldtree_subset)
structure(list(Nnode = 16L, tip.label = c("d7b2ba0afc3bd176266fae72daed8326", 
"bc1c63a0304b3ff8518f27ddd7871b4b", "100acb5a73f74c73da6d9b4e43402a57", 
"97dc807436722059ee30761b239fbe6e", "0d98bbd4292b6bb2eeef3dc5de8933e6", 
"5bee4123b4b3da7f09ecfabd30286fda", "9c002ac16e0d8660a7c9f836c1aa5e60", 
"0014a67d0e23b6ea5e33206b0f69ba18", "22c41b072712a6d3a503a499d2601755", 
"0138b4edc2e8818f9d594c158bbf4f20", "5cf2490c5286757fd316cff11b6977a9", 
"6a3ea3980435bb709d2ef0dacfa1c752", "b75ee709949995ec8c2b8d97349ec842", 
"a1bebf28de164805905291ce00b49d51", "30db1fb805b6581bab7e0ef5604663ba", 
"cfd611b09cee6f8ac6b139bbbcf59919", "e234c0d07b0652afe259ec8b7d551ecb"
), node.label = c("0.743", "0.863", "0.876", "0.932", "0.774", 
"0.843", "0.901", "0.788", "0.489", "0.887", "0.846", "0.947", 
"0.471", "0.692", "0.852", "0.427"), edge = structure(c(18L, 
19L, 20L, 20L, 21L, 22L, 22L, 21L, 19L, 18L, 23L, 24L, 25L, 26L, 
26L, 25L, 24L, 23L, 27L, 28L, 29L, 30L, 30L, 29L, 31L, 31L, 28L, 
32L, 33L, 33L, 32L, 27L, 19L, 20L, 17L, 21L, 22L, 16L, 15L, 14L, 
13L, 23L, 24L, 25L, 26L, 12L, 11L, 10L, 9L, 27L, 28L, 29L, 30L, 
8L, 7L, 31L, 6L, 5L, 32L, 33L, 4L, 3L, 2L, 1L), dim = c(32L, 
2L)), edge.length = c(0.008489367, 0.012378135, 0.099356344, 
0.018610025, 0.151006816, 0.020717346, 0.021791617, 0.169732458, 
0.096117467, 0.01142402, 0.031539499, 5e-09, 0.00822259, 0.054142434, 
0.04399721, 0.105824174, 0.059621329, 0.037877628, 0.026251384, 
0.016803509, 0.025350434, 0.025888111, 0.0540378, 0.006197142, 
0.03678638, 0.075242268, 0.035636402, 0.08815712, 0.042846825, 
0.005280567, 0.037514183, 0.059076984), root = 2043, root.edge = 0.840963913), class = "phylo", order = "cladewise", group = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), levels = c("0", "1"), class = "factor"))

> dput(asvnames_subset)
structure(list(sequence_id = c("NASQAN2015.148.348", "NASQAN2015.163.348", 
"NASQAN2015.203.394", "NASQAN2015.161.348", "NASQAN2015.161.348", 
"NASQAN2015.148.348", "NASQAN2015.163.348", "NASQAN2015.161.348", 
"NASQAN2015.163.348", "NASQAN2015.147.348", "NASQAN2015.162.348", 
"NASQAN2016.009.394", "NASQAN2015.147.348", "NASQAN2015.148.348", 
"NASQAN2015.148.348", "NASQAN2015.204.394", "NASQAN2016.009.394", 
"NASQAN2015.162.348", "NASQAN2016.013.394", "NASQAN2015.163.348", 
"NASQAN2016.005.394", "NASQAN2015.161.348", "NASQAN2015.204.394", 
"NASQAN2015.163.348", "NASQAN2015.148.348", "NASQAN2016.014.394", 
"NASQAN2015.147.348", "NASQAN2015.161.348", "NASQAN2015.163.348"
), asv = c("a1bebf28de164805905291ce00b49d51", "a1bebf28de164805905291ce00b49d51", 
"a1bebf28de164805905291ce00b49d51", "0138b4edc2e8818f9d594c158bbf4f20", 
"5cf2490c5286757fd316cff11b6977a9", "22c41b072712a6d3a503a499d2601755", 
"22c41b072712a6d3a503a499d2601755", "6a3ea3980435bb709d2ef0dacfa1c752", 
"2101f9bb6f5b9d9c8af9eaa3f711be03", "1a3dd6e7c593a37bf37fd6c6382efa53", 
"1a3dd6e7c593a37bf37fd6c6382efa53", "1a3dd6e7c593a37bf37fd6c6382efa53", 
"98e3d05c0e8980b343dddd744adc4260", "0014a67d0e23b6ea5e33206b0f69ba18", 
"5bee4123b4b3da7f09ecfabd30286fda", "9c002ac16e0d8660a7c9f836c1aa5e60", 
"100acb5a73f74c73da6d9b4e43402a57", "97dc807436722059ee30761b239fbe6e", 
"0d98bbd4292b6bb2eeef3dc5de8933e6", "d7b2ba0afc3bd176266fae72daed8326", 
"bc1c63a0304b3ff8518f27ddd7871b4b", "6eecdd96b3bcbc550cf76daaf65ac44c", 
"b75ee709949995ec8c2b8d97349ec842", "30db1fb805b6581bab7e0ef5604663ba", 
"cfd611b09cee6f8ac6b139bbbcf59919", "cfd611b09cee6f8ac6b139bbbcf59919", 
"e234c0d07b0652afe259ec8b7d551ecb", "e234c0d07b0652afe259ec8b7d551ecb", 
"e234c0d07b0652afe259ec8b7d551ecb"), sample_id = c("NASQAN2015.148", 
"NASQAN2015.163", "NASQAN2015.203", "NASQAN2015.161", "NASQAN2015.161", 
"NASQAN2015.148", "NASQAN2015.163", "NASQAN2015.161", "NASQAN2015.163", 
"NASQAN2015.147", "NASQAN2015.162", "NASQAN2016.009", "NASQAN2015.147", 
"NASQAN2015.148", "NASQAN2015.148", "NASQAN2015.204", "NASQAN2016.009", 
"NASQAN2015.162", "NASQAN2016.013", "NASQAN2015.163", "NASQAN2016.005", 
"NASQAN2015.161", "NASQAN2015.204", "NASQAN2015.163", "NASQAN2015.148", 
"NASQAN2016.014", "NASQAN2015.147", "NASQAN2015.161", "NASQAN2015.163"
), season = c("summer", "summer", "autumn", "spring", "spring", 
"summer", "summer", "spring", "summer", "spring", "spring", "spring", 
"spring", "summer", "summer", "winter", "spring", "spring", "summer", 
"summer", "winter", "spring", "winter", "summer", "summer", "summer", 
"spring", "spring", "summer")), class = "data.frame", row.names = c(NA, 
-29L))

Original Q&A

match metadata to phylo object to color tips by season

There are 0 best solutions below

Related Questions in R

Related Questions in PLOT

Related Questions in PHYLOGENY

Trending Questions

Popular # Hahtags

Popular Questions