Query Kew's Tree of Life for specimens that have been sampled for sequencing.
search_tol(query = "", genes = FALSE, limit = 50, page = 1, .wait = 0.2)
The string to query the database with.
Set to TRUE to download results for genes instead of specimens.
An integer specifying the number of results to return.
An integer specify the page of results to request.
Time to wait before making a requests, to help rate limiting.
Returns an object of class tol_search
that is a simple
structure with slots for:
total
: the total number of results held in ToL for the query.
page
: the page of results requested.
limit
: the maximum number of results requested from the API.
results
: the query results parsed into a list.
query
: the query string submitted to the API.
response
: the httr response object.
The Tree of Life is a database of specimens sequenced as part of Kew's efforts to build a comprehensive evolutionary tree of life for flowering plants.
The search API allows users to query the database for specimens
based on their taxonomic information. Filtering and keyword-search
are not currently implemented. All searches are based on taxonomic
information, so Myrcia
and Myrtales
will return results, but
Brummitt
will not.
The search API also allows users to download information about sequenced
genes. There is currently no ability to search within the results for genes,
but a table of all genes can be accessed using keyword argument genes=TRUE
.
Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035
# get the first 50 of all sequenced specimens
search_tol(limit=50)
#> No encoding supplied: defaulting to UTF-8.
#> <ToL search: ''>
#> total results: 3109
#> returned results: 50
#> total pages: 63
#> current page: 1
#> List of 1
#> $ :List of 20
#> ..$ age : int 2013
#> ..$ collector : chr "Burgt, X.M. van der"
#> ..$ collector_no : chr "1721"
#> ..$ country : NULL
#> ..$ fasta_file_url : chr "http://sftp.kew.org/pub/paftol/current_release/fasta/by_recovery/INSDC.ERR5034798.Afrofittonia_silvestris.a353.fasta"
#> ..$ gene_stats :List of 2
#> ..$ genus :List of 3
#> ..$ herbcat_url : chr "https://apps.kew.org/herbcat/detailsQuery.do?barcode=K001061989"
#> ..$ id : int 3093
#> ..$ is_suspicious_placement: logi FALSE
#> ..$ material_source :List of 2
#> ..$ museum_barcode : chr "K001061989"
#> ..$ project :List of 2
#> ..$ raw_reads :List of 1
#> ..$ sequence_id : int 5904
#> ..$ species :List of 2
#> ..$ specimen_reference : chr "Burgt, X.M. van der 1721 (K)"
#> ..$ specimen_source : chr "Royal Botanic Gardens Kew"
#> ..$ taxonomy :List of 4
#> ..$ voucher_no : NULL
# search for all sequenced Myrcia specimens
search_tol("Myrcia")
#> No encoding supplied: defaulting to UTF-8.
#> <ToL search: 'Myrcia'>
#> total results: 17
#> returned results: 17
#> total pages: 1
#> current page: 1
#> List of 1
#> $ :List of 20
#> ..$ age : NULL
#> ..$ collector : chr "Lima, D. F."
#> ..$ collector_no : chr "504"
#> ..$ country : NULL
#> ..$ fasta_file_url : chr "http://sftp.kew.org/pub/paftol/current_release/fasta/by_recovery/INSDC.ERR5034774.Myrcia_albotomentosa.a353.fasta"
#> ..$ gene_stats :List of 2
#> ..$ genus :List of 3
#> ..$ herbcat_url : NULL
#> ..$ id : int 2717
#> ..$ is_suspicious_placement: logi FALSE
#> ..$ material_source :List of 2
#> ..$ museum_barcode : NULL
#> ..$ project :List of 2
#> ..$ raw_reads :List of 1
#> ..$ sequence_id : int 5528
#> ..$ species :List of 2
#> ..$ specimen_reference : chr "Lima, D. F. 504 (K)"
#> ..$ specimen_source : chr "RBGKew DNA Bank"
#> ..$ taxonomy :List of 4
#> ..$ voucher_no : NULL
# get all sequenced specimens
search_tol(limit=5000)
#> No encoding supplied: defaulting to UTF-8.
#> <ToL search: ''>
#> total results: 3109
#> returned results: 3109
#> total pages: 1
#> current page: 1
#> List of 1
#> $ :List of 20
#> ..$ age : int 2013
#> ..$ collector : chr "Burgt, X.M. van der"
#> ..$ collector_no : chr "1721"
#> ..$ country : NULL
#> ..$ fasta_file_url : chr "http://sftp.kew.org/pub/paftol/current_release/fasta/by_recovery/INSDC.ERR5034798.Afrofittonia_silvestris.a353.fasta"
#> ..$ gene_stats :List of 2
#> ..$ genus :List of 3
#> ..$ herbcat_url : chr "https://apps.kew.org/herbcat/detailsQuery.do?barcode=K001061989"
#> ..$ id : int 3093
#> ..$ is_suspicious_placement: logi FALSE
#> ..$ material_source :List of 2
#> ..$ museum_barcode : chr "K001061989"
#> ..$ project :List of 2
#> ..$ raw_reads :List of 1
#> ..$ sequence_id : int 5904
#> ..$ species :List of 2
#> ..$ specimen_reference : chr "Burgt, X.M. van der 1721 (K)"
#> ..$ specimen_source : chr "Royal Botanic Gardens Kew"
#> ..$ taxonomy :List of 4
#> ..$ voucher_no : NULL
# search for a species name and print the results
r <- search_tol("Myrcia guianensis")
#> No encoding supplied: defaulting to UTF-8.
print(r)
#> <ToL search: 'Myrcia guianensis'>
#> total results: 1
#> returned results: 1
#> total pages: 1
#> current page: 1
#> List of 1
#> $ :List of 20
#> ..$ age : NULL
#> ..$ collector : chr "Holst, B. K."
#> ..$ collector_no : chr "9389"
#> ..$ country : NULL
#> ..$ fasta_file_url : chr "http://sftp.kew.org/pub/paftol/current_release/fasta/by_recovery/INSDC.ERR5033709.Myrcia_guianensis.a353.fasta"
#> ..$ gene_stats :List of 2
#> ..$ genus :List of 3
#> ..$ herbcat_url : NULL
#> ..$ id : int 2699
#> ..$ is_suspicious_placement: logi FALSE
#> ..$ material_source :List of 2
#> ..$ museum_barcode : NULL
#> ..$ project :List of 2
#> ..$ raw_reads :List of 1
#> ..$ sequence_id : int 5510
#> ..$ species :List of 2
#> ..$ specimen_reference : chr "Holst, B. K. 9389 (K)"
#> ..$ specimen_source : chr "RBGKew DNA Bank"
#> ..$ taxonomy :List of 4
#> ..$ voucher_no : NULL
# simplify search results to a `tibble`
r <- search_tol("Myrcia")
#> No encoding supplied: defaulting to UTF-8.
tidy(r)
#> # A tibble: 17 × 20
#> age collector collector_no country fasta_file_url gene_stats genus
#> <int> <chr> <chr> <lgl> <chr> <list> <lis>
#> 1 NA Lima, D. F. 504 NA http://sftp.kew.or… <tibble [1… <tib…
#> 2 NA Prevost 4751 NA http://sftp.kew.or… <tibble [1… <tib…
#> 3 NA Lucas, E. J. 71 NA http://sftp.kew.or… <tibble [1… <tib…
#> 4 NA Lucas, E. J. 254 NA http://sftp.kew.or… <tibble [1… <tib…
#> 5 NA Holst, B. K. 9389 NA http://sftp.kew.or… <tibble [1… <tib…
#> 6 NA Mazine 1052 NA http://sftp.kew.or… <tibble [1… <tib…
#> 7 2012 Simon, M.F. 1689 NA http://sftp.kew.or… <tibble [1… <tib…
#> 8 NA Lucas, E. J. 86 NA http://sftp.kew.or… <tibble [1… <tib…
#> 9 NA Harley 50309 NA http://sftp.kew.or… <tibble [1… <tib…
#> 10 NA Lucas, E. J. 98 NA http://sftp.kew.or… <tibble [1… <tib…
#> 11 2006 Lucas, E. J. 930 NA http://sftp.kew.or… <tibble [1… <tib…
#> 12 NA Lucas, E. J. 217 NA http://sftp.kew.or… <tibble [1… <tib…
#> 13 NA Faria 1582 NA http://sftp.kew.or… <tibble [1… <tib…
#> 14 NA Lucas, E. 73 NA http://sftp.kew.or… <tibble [1… <tib…
#> 15 2010 Savassi ESA 85681 NA http://sftp.kew.or… <tibble [1… <tib…
#> 16 2009 Holst, B. K. 8862 NA http://sftp.kew.or… <tibble [1… <tib…
#> 17 2009 Lucas, E. J. 480 NA http://sftp.kew.or… <tibble [1… <tib…
#> # … with 13 more variables: herbcat_url <chr>, id <int>,
#> # is_suspicious_placement <lgl>, material_source <list>,
#> # museum_barcode <chr>, project <list>, raw_reads <list>, sequence_id <int>,
#> # species <list>, specimen_reference <chr>, specimen_source <chr>,
#> # taxonomy <list>, voucher_no <lgl>
# gene stats are nested in the results
r <- search_tol("Myrcia")
#> No encoding supplied: defaulting to UTF-8.
tidied <- tidy(r)
tidyr::unnest(tidied, cols=gene_stats)
#> # A tibble: 17 × 21
#> age collector collector_no country fasta_file_url bp_recovered
#> <int> <chr> <chr> <lgl> <chr> <int>
#> 1 NA Lima, D. F. 504 NA http://sftp.kew.org/pub… 154482
#> 2 NA Prevost 4751 NA http://sftp.kew.org/pub… 155409
#> 3 NA Lucas, E. J. 71 NA http://sftp.kew.org/pub… 173061
#> 4 NA Lucas, E. J. 254 NA http://sftp.kew.org/pub… 164862
#> 5 NA Holst, B. K. 9389 NA http://sftp.kew.org/pub… 145281
#> 6 NA Mazine 1052 NA http://sftp.kew.org/pub… 178383
#> 7 2012 Simon, M.F. 1689 NA http://sftp.kew.org/pub… 144168
#> 8 NA Lucas, E. J. 86 NA http://sftp.kew.org/pub… 159009
#> 9 NA Harley 50309 NA http://sftp.kew.org/pub… 170181
#> 10 NA Lucas, E. J. 98 NA http://sftp.kew.org/pub… 158568
#> 11 2006 Lucas, E. J. 930 NA http://sftp.kew.org/pub… 171699
#> 12 NA Lucas, E. J. 217 NA http://sftp.kew.org/pub… 136671
#> 13 NA Faria 1582 NA http://sftp.kew.org/pub… 176967
#> 14 NA Lucas, E. 73 NA http://sftp.kew.org/pub… 173994
#> 15 2010 Savassi ESA 85681 NA http://sftp.kew.org/pub… 178776
#> 16 2009 Holst, B. K. 8862 NA http://sftp.kew.org/pub… 161097
#> 17 2009 Lucas, E. J. 480 NA http://sftp.kew.org/pub… 175335
#> # … with 15 more variables: genes_recovered <int>, genus <list>,
#> # herbcat_url <chr>, id <int>, is_suspicious_placement <lgl>,
#> # material_source <list>, museum_barcode <chr>, project <list>,
#> # raw_reads <list>, sequence_id <int>, species <list>,
#> # specimen_reference <chr>, specimen_source <chr>, taxonomy <list>,
#> # voucher_no <lgl>
# species names are nested in the results
r <- search_tol("Myrcia")
#> No encoding supplied: defaulting to UTF-8.
tidied <- tidy(r)
tidyr::unnest(tidied, cols=species, names_sep="_")
#> # A tibble: 17 × 21
#> age collector collector_no country fasta_file_url gene_stats genus
#> <int> <chr> <chr> <lgl> <chr> <list> <lis>
#> 1 NA Lima, D. F. 504 NA http://sftp.kew.or… <tibble [1… <tib…
#> 2 NA Prevost 4751 NA http://sftp.kew.or… <tibble [1… <tib…
#> 3 NA Lucas, E. J. 71 NA http://sftp.kew.or… <tibble [1… <tib…
#> 4 NA Lucas, E. J. 254 NA http://sftp.kew.or… <tibble [1… <tib…
#> 5 NA Holst, B. K. 9389 NA http://sftp.kew.or… <tibble [1… <tib…
#> 6 NA Mazine 1052 NA http://sftp.kew.or… <tibble [1… <tib…
#> 7 2012 Simon, M.F. 1689 NA http://sftp.kew.or… <tibble [1… <tib…
#> 8 NA Lucas, E. J. 86 NA http://sftp.kew.or… <tibble [1… <tib…
#> 9 NA Harley 50309 NA http://sftp.kew.or… <tibble [1… <tib…
#> 10 NA Lucas, E. J. 98 NA http://sftp.kew.or… <tibble [1… <tib…
#> 11 2006 Lucas, E. J. 930 NA http://sftp.kew.or… <tibble [1… <tib…
#> 12 NA Lucas, E. J. 217 NA http://sftp.kew.or… <tibble [1… <tib…
#> 13 NA Faria 1582 NA http://sftp.kew.or… <tibble [1… <tib…
#> 14 NA Lucas, E. 73 NA http://sftp.kew.or… <tibble [1… <tib…
#> 15 2010 Savassi ESA 85681 NA http://sftp.kew.or… <tibble [1… <tib…
#> 16 2009 Holst, B. K. 8862 NA http://sftp.kew.or… <tibble [1… <tib…
#> 17 2009 Lucas, E. J. 480 NA http://sftp.kew.or… <tibble [1… <tib…
#> # … with 14 more variables: herbcat_url <chr>, id <int>,
#> # is_suspicious_placement <lgl>, material_source <list>,
#> # museum_barcode <chr>, project <list>, raw_reads <list>, sequence_id <int>,
#> # species_id <int>, species_name <chr>, specimen_reference <chr>,
#> # specimen_source <chr>, taxonomy <list>, voucher_no <lgl>
# as is higher taxonomy
r <- search_tol("Myrcia")
#> No encoding supplied: defaulting to UTF-8.
tidied <- tidy(r)
tidyr::unnest(tidied, cols=species, names_sep="_")
#> # A tibble: 17 × 21
#> age collector collector_no country fasta_file_url gene_stats genus
#> <int> <chr> <chr> <lgl> <chr> <list> <lis>
#> 1 NA Lima, D. F. 504 NA http://sftp.kew.or… <tibble [1… <tib…
#> 2 NA Prevost 4751 NA http://sftp.kew.or… <tibble [1… <tib…
#> 3 NA Lucas, E. J. 71 NA http://sftp.kew.or… <tibble [1… <tib…
#> 4 NA Lucas, E. J. 254 NA http://sftp.kew.or… <tibble [1… <tib…
#> 5 NA Holst, B. K. 9389 NA http://sftp.kew.or… <tibble [1… <tib…
#> 6 NA Mazine 1052 NA http://sftp.kew.or… <tibble [1… <tib…
#> 7 2012 Simon, M.F. 1689 NA http://sftp.kew.or… <tibble [1… <tib…
#> 8 NA Lucas, E. J. 86 NA http://sftp.kew.or… <tibble [1… <tib…
#> 9 NA Harley 50309 NA http://sftp.kew.or… <tibble [1… <tib…
#> 10 NA Lucas, E. J. 98 NA http://sftp.kew.or… <tibble [1… <tib…
#> 11 2006 Lucas, E. J. 930 NA http://sftp.kew.or… <tibble [1… <tib…
#> 12 NA Lucas, E. J. 217 NA http://sftp.kew.or… <tibble [1… <tib…
#> 13 NA Faria 1582 NA http://sftp.kew.or… <tibble [1… <tib…
#> 14 NA Lucas, E. 73 NA http://sftp.kew.or… <tibble [1… <tib…
#> 15 2010 Savassi ESA 85681 NA http://sftp.kew.or… <tibble [1… <tib…
#> 16 2009 Holst, B. K. 8862 NA http://sftp.kew.or… <tibble [1… <tib…
#> 17 2009 Lucas, E. J. 480 NA http://sftp.kew.or… <tibble [1… <tib…
#> # … with 14 more variables: herbcat_url <chr>, id <int>,
#> # is_suspicious_placement <lgl>, material_source <list>,
#> # museum_barcode <chr>, project <list>, raw_reads <list>, sequence_id <int>,
#> # species_id <int>, species_name <chr>, specimen_reference <chr>,
#> # specimen_source <chr>, taxonomy <list>, voucher_no <lgl>
# search for all gene entries and print results
r <- search_tol(genes=TRUE, limit=500)
#> No encoding supplied: defaulting to UTF-8.
print(r)
#> <ToL search: 'genes'>
#> total results: 353
#> returned results: 353
#> total pages: 1
#> current page: 1
#> List of 1
#> $ :List of 16
#> ..$ alignment_file_url : chr "http://sftp.kew.org/pub/paftol/current_release/fasta/alignments/5328.dna.aln.fasta"
#> ..$ average_contig_length : num 493
#> ..$ average_contig_length_percentage: num 86.3
#> ..$ exemplar_accession : chr "Q8GWR1"
#> ..$ exemplar_hyperlink : chr "https://www.uniprot.org/uniprot/Q8GWR1\n"
#> ..$ exemplar_name : chr "AAAS"
#> ..$ exemplar_species : chr "Arabidopsis thaliana (Mouse-ear cress)"
#> ..$ fasta_file_url : chr "http://sftp.kew.org/pub/paftol/current_release/fasta/by_gene/5328.dna.fasta"
#> ..$ genera_count : int 2294
#> ..$ id : int 51
#> ..$ internal_name : chr "5328"
#> ..$ newick_file : chr " "
#> ..$ newick_file_path_name : chr " "
#> ..$ sequence_count : int 3041
#> ..$ species_count : int 2905
#> ..$ tree_file_url : chr "http://sftp.kew.org/pub/paftol/current_release/tree/gene/5328.tree"
# tidy the returned genes
tidy(r)
#> # A tibble: 353 × 16
#> alignment_file_url average_contig_l… average_contig_l… exemplar_access…
#> <chr> <dbl> <dbl> <chr>
#> 1 http://sftp.kew.org/pub… 493. 86.3 Q8GWR1
#> 2 http://sftp.kew.org/pub… 584. 51.7 Q8H1R4
#> 3 http://sftp.kew.org/pub… 453. 55.2 Q8LEF6
#> 4 http://sftp.kew.org/pub… 487. 55.1 Q9FZ49
#> 5 http://sftp.kew.org/pub… 645. 60.8 P04747
#> 6 http://sftp.kew.org/pub… 790. 68.4 Q9ZUC1
#> 7 http://sftp.kew.org/pub… 641. 53.6 Q8VY89
#> 8 http://sftp.kew.org/pub… 969. 65.2 Q9LRZ3
#> 9 http://sftp.kew.org/pub… 344. 67.4 Q9FIG9
#> 10 http://sftp.kew.org/pub… 612. 90.4 F4JUL9
#> # … with 343 more rows, and 12 more variables: exemplar_hyperlink <chr>,
#> # exemplar_name <chr>, exemplar_species <chr>, fasta_file_url <chr>,
#> # genera_count <int>, id <int>, internal_name <chr>, newick_file <chr>,
#> # newick_file_path_name <chr>, sequence_count <int>, species_count <int>,
#> # tree_file_url <chr>