I have the taxonomic ID of species and I can get the species and genus name from NCBI (https://www.ncbi.nlm.nih.gov/Taxonomy/TaxIdentifier/tax_identifier.cgi). But I want phylum, class, order.. all from these data.

I have tried taxize package, but not working for a large dataset.

specieslist <- c("Clostridium", "Clostridium",  "Achromobacter",    "Achromobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Actinomyces",  "Actinomyces",  "Aeromonas",    "Agrococcus",   "Alcanivorax",  "Alkalihalobacillus",   "Alloprevotella",   "Aminobacterium",   "Amniculibacterium",    "Anaerocolumna",    "Anaerocolumna",    "Anaerocolumna",    "Asticcacaulis",    "Atopobium",    "Bacillus", "Bacillus", "Bacteroidales",    "Bacteroides",  "Bacteroides",  "Bacteroides",  "Bacteroides",  "Bacteroides",  "Bacteroides",  "Barnesiella",  "Bifidobacterium",  "Blochmannia",  "Bordetella",   "Brevibacillus",    "Buchnera", "Burkholderia", "Butyricimonas",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Capnocytophaga",   "Capnocytophaga",   "Capnocytophaga",   "Chroococcidiopsis",    "Citrobacter",  "Clostridium",  "Clostridium",  "Clostridium",  "Clostridium",  "Corynebacterium",  "Corynebacterium",  "Corynebacterium",  "Corynebacterium",  "Cutibacterium",    "Dialister",    "Dolosigranulum",   "Enterobacter", "Enterococcus", "Entomoplasma", "Escherichia",  "Escherichia",  "Escherichia",  "Eubacterium",  "Fermentimonas",    "Frankia",  "Fusobacterium",    "Fusobacterium",    "Fusobacterium",    "Fusobacterium",    "Fusobacterium",    "Fusobacterium",    "Gemella",  "Haemophilus",  "Haemophilus",  "Halomonas",    "Hydrogenophaga",   "Ilyobacter",   "Klebsiella",   "Klebsiella",   "Klebsiella",   "Klebsiella",   "Klebsiella",   "Kocuria",  "Kytococcus",   "Lachnoanaerobaculum",  "Lachnospira",  "Lachnospiraceae",  "Lachnospiraceae",  "Lacrimispora", "Lactobacillus",    "Lactobacillus",    "Lactobacillus",    "Lactobacillus",    "Lactobacillus",    "Lancefieldella",   "Lautropia",    "Leptotrichia", "Leptotrichia", "Leptotrichia", "Leptotrichia", "Leptotrichia", "Leptotrichia", "Leptotrichia", "Leptotrichia", "Ligilactobacillus",    "Limosilactobacillus",  "Luteimonas",   "Lysinibacillus",   "Lysobacter",   "Lysobacter",   "Lysobacter",   "Magnetospirillum", "Marivirga",    "Megasphaera",  "Megasphaera",  "Meiothermus",  "Methylobacterium", "Methylobacterium", "Methylobacterium", "Methylobacterium", "Methylobacterium", "Microbacterium",   "Microbacterium",   "Microbacterium",   "Microbacterium",   "Micrococcus",  "Muribaculaceae",   "Muribaculum",  "Muribaculum",  "Neisseria",    "Neisseria",    "Neisseria",    "Neisseria",    "Neisseria",    "Neisseria",    "Neisseria",    "Nocardioides", "Nocardioides", "Paludibacter", "Pantoea",  "Paracoccus",   "Paracoccus",   "Paraprevotella",   "Pasteurella",  "Petrimonas",   "Phenylobacterium", "Phocaeicola",  "Phocaeicola",  "Phocaeicola",  "Phyllobacterium",  "Polaribacter", "Pontibacter",  "Pontibacter",  "Porphyromonas",    "Porphyromonas",    "Porphyromonas",    "Porphyromonas",    "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Proteus",  "Pseudoleptotrichia",   "Pseudomonas",  "Pseudonocardia",   "Pseudonocardia",   "Raoultella",   "Rheinheimera", "Romboutsia",   "Roseivirga",   "Roseococcus",  "Rothia",   "Rothia",   "Rubrobacter",  "Rubrobacter",  "Rufibacter",   "Saccharomonospora",    "Saccharopolyspora",    "Saccharopolyspora",    "Salinivirga",  "Salmonella",   "Schaalia", "Sedimentisphaera", "Selenomonas",  "Selenomonas",  "Selenomonas",  "Selenomonas",  "Selenomonas",  "Selenomonas",  "Shigella", "Skermanella",  "Sphingosinicella", "Spirosoma",    "Staphylococcus",   "Staphylococcus",   "Stenotrophomonas", "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptomyces", "Tannerella",   "Tannerella",   "Thermovirga",  "Treponema",    "Treponema",    "Treponema",    "Treponema",    "Treponema",    "Treponema",    "Veillonella",  "Veillonella",  "Veillonella",  "Veillonella",  "Veillonella")


t <- tax_name(query = c(specieslist), get = c("phylum","class", "order", "family", "genus"), db = "ncbi")

Any suggestion please?

2

There are 2 best solutions below

0
Shaminur On

https://bioinf.shenwei.me/taxonkit/usage/#usage-and-examples

taxonkit lineage txtid.txt | tee lineage.txt

cat lineage.txt \
    | taxonkit reformat \
    | csvtk -H -t cut -f 1,3 \
    | csvtk -H -t sep -f 2 -s ';' -R \
    | csvtk add-header -t -n taxid,kindom,phylum,class,order,family,genus,species \
    | csvtk pretty -t
0
sckott On

(taxize maintainer here)

Another option if taxize is too slow for you is taxizedb. By default uses NCBI as the data source. taxizedb is similar to taxize, but uses local database dumps instead of doing http requests; but you do have the initial setup time to download databases

install.packages("taxizedb")
library(taxizedb)
ids <- name2taxid(x, out_type="summary")
classification(ids$id)

Then you can pull out whatever ranks you want from each data.frame