Find how taxa changed through the cleaning/filtering/tidying process

find_taxa does not work hierarchically. For example Eucalyptus will only match genus level records, not species records, such as Eucalyptus leucoxylon.

find_taxa(
  taxa_to_find,
  taxa_cols = c("original_name", "taxa"),
  lutaxa,
  filt_df_prefix = "flor_"
)

Arguments

taxa_to_find: Character. Taxa name to find.
taxa_cols: Character. Name of column(s) across data frames containing taxa information.
lutaxa: Dataframe with column names matching taxa_cols, usually, say, taxonomy$species$lutaxa as a result of make_taxonomy().
filt_df_prefix: Character. Prefix used in each of the data frames created at each step in the filtering process.

Examples



# library(envClean)

# Example taxa
use_taxa <- "Eucalyptus gracilis"

# Set context
context <- c("lat", "long", "month", "year")

# Start
flor_start <- flor_all %>%
  tibble::as_tibble() %>%
  envFunc::add_time_stamp()

# Remove singletons
flor_single <- flor_start %>%
  filter_counts(context = context
                , thresh = 5
                ) %>%
  envFunc::add_time_stamp()

# Just keep most recent contexts
flor_recent <- flor_single %>%
  dplyr::group_by(across(any_of(context[!context %in% c("month", "year")]))) %>%
  dplyr::filter(year == max(year)
                , month == max(month)
                ) %>%
  dplyr::ungroup() %>%
  envFunc::add_time_stamp()

# make_taxonomy
taxa <- make_taxonomy(df = flor_recent)
#> Joining with `by = join_by(original_name)`
#> galah: version 2.0.2
#> ℹ Default node set to ALA (ala.org.au).
#> ℹ See all supported GBIF nodes with `show_all(atlases)`.
#> ℹ To change nodes, use e.g. `galah_config(atlas = "GBIF")`.
#> Querying API ■■■                                5% |  ETA: 20s
#> Querying API ■■■                                6% |  ETA: 19s
#> Querying API ■■■                                8% |  ETA: 18s
#> Querying API ■■■■                               9% |  ETA: 18s
#> Querying API ■■■■                              11% |  ETA: 17s
#> Querying API ■■■■■                             12% |  ETA: 17s
#> Querying API ■■■■■                             13% |  ETA: 16s
#> Querying API ■■■■■                             14% |  ETA: 16s
#> Querying API ■■■■■■                            15% |  ETA: 16s
#> Querying API ■■■■■■                            17% |  ETA: 15s
#> Querying API ■■■■■■                            18% |  ETA: 15s
#> Querying API ■■■■■■■                           19% |  ETA: 15s
#> Querying API ■■■■■■■                           20% |  ETA: 15s
#> Querying API ■■■■■■■                           21% |  ETA: 14s
#> Querying API ■■■■■■■■                          22% |  ETA: 14s
#> Querying API ■■■■■■■■                          23% |  ETA: 14s
#> Querying API ■■■■■■■■                          25% |  ETA: 14s
#> Querying API ■■■■■■■■■                         26% |  ETA: 13s
#> Querying API ■■■■■■■■■                         27% |  ETA: 13s
#> Querying API ■■■■■■■■■■                        29% |  ETA: 13s
#> Querying API ■■■■■■■■■■                        30% |  ETA: 12s
#> Querying API ■■■■■■■■■■                        31% |  ETA: 12s
#> Querying API ■■■■■■■■■■■                       33% |  ETA: 12s
#> Querying API ■■■■■■■■■■■                       34% |  ETA: 11s
#> Querying API ■■■■■■■■■■■■                      35% |  ETA: 11s
#> Querying API ■■■■■■■■■■■■                      37% |  ETA: 11s
#> Querying API ■■■■■■■■■■■■                      38% |  ETA: 11s
#> Querying API ■■■■■■■■■■■■■                     39% |  ETA: 11s
#> Querying API ■■■■■■■■■■■■■                     41% |  ETA: 10s
#> Querying API ■■■■■■■■■■■■■■                    42% |  ETA: 10s
#> Querying API ■■■■■■■■■■■■■■                    43% |  ETA: 10s
#> Querying API ■■■■■■■■■■■■■■                    45% |  ETA: 10s
#> Querying API ■■■■■■■■■■■■■■■                   46% |  ETA:  9s
#> Querying API ■■■■■■■■■■■■■■■                   47% |  ETA:  9s
#> Querying API ■■■■■■■■■■■■■■■■                  49% |  ETA:  9s
#> Querying API ■■■■■■■■■■■■■■■■                  50% |  ETA:  9s
#> Querying API ■■■■■■■■■■■■■■■■                  51% |  ETA:  8s
#> Querying API ■■■■■■■■■■■■■■■■■                 52% |  ETA:  8s
#> Querying API ■■■■■■■■■■■■■■■■■                 54% |  ETA:  8s
#> Querying API ■■■■■■■■■■■■■■■■■■                55% |  ETA:  8s
#> Querying API ■■■■■■■■■■■■■■■■■■                56% |  ETA:  7s
#> Querying API ■■■■■■■■■■■■■■■■■■                58% |  ETA:  7s
#> Querying API ■■■■■■■■■■■■■■■■■■■               59% |  ETA:  7s
#> Querying API ■■■■■■■■■■■■■■■■■■■               60% |  ETA:  7s
#> Querying API ■■■■■■■■■■■■■■■■■■■               62% |  ETA:  6s
#> Querying API ■■■■■■■■■■■■■■■■■■■■              63% |  ETA:  6s
#> Querying API ■■■■■■■■■■■■■■■■■■■■              64% |  ETA:  6s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■             66% |  ETA:  6s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■             67% |  ETA:  6s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■             68% |  ETA:  5s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■            69% |  ETA:  5s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■            71% |  ETA:  5s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■           72% |  ETA:  5s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■           74% |  ETA:  4s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■          75% |  ETA:  4s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■          77% |  ETA:  4s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■          78% |  ETA:  4s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■         79% |  ETA:  3s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■         81% |  ETA:  3s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■        82% |  ETA:  3s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■        83% |  ETA:  3s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■        85% |  ETA:  2s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■■       86% |  ETA:  2s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■■       88% |  ETA:  2s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■■■      89% |  ETA:  2s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■■■      90% |  ETA:  2s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■     92% |  ETA:  1s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■     93% |  ETA:  1s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■     94% |  ETA:  1s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■    96% |  ETA:  1s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■    97% |  ETA:  0s
#> Querying API ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■   98% |  ETA:  0s
#> Matched 264 of 265 taxonomic search terms in selected atlas (Australia).
#> 1 unmatched search term:
#> • "Kali tragus"
#> 
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to H:/temp/nige\RtmpYfG5ks\file4c806ecf360e.parquet
#> The following were completely unmatched: Kali tragus. Consider providing more taxonomic levels, or an override, for each unmatched taxa?
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`

# bin taxa
flor_taxa <- flor_recent %>%
  bin_taxa(taxonomy = taxa$species)
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(taxa)`
#> Joining with `by = join_by(original_name)`

# distinct over bins
flor_bin <- flor_taxa %>%
  dplyr::distinct(taxa, dplyr::across(tidyselect::any_of(context)))

# How did records of 'taxa' change through the filtering?
# Eucalyptus gracilis
find_taxa("Eucalyptus gracilis"
          , lutaxa = taxa$species$lutaxa
          )
#> # A tibble: 0 × 9
#> # ℹ 9 variables: name <chr>, obj <list>, has_stamp <lgl>, nrow <dbl>,
#> #   ctime <???>, founds <list>, taxa <chr>, records <dbl>, found <chr>

# Chenopodium nutans
find_taxa("Chenopodium nutans"
          , lutaxa = taxa$species$lutaxa
          )
#> # A tibble: 0 × 9
#> # ℹ 9 variables: name <chr>, obj <list>, has_stamp <lgl>, nrow <dbl>,
#> #   ctime <???>, founds <list>, taxa <chr>, records <dbl>, found <chr>