# Motus data and data bases
library(motus) # Fetch motus data
library(DBI) # Data bases
library(arrow) # feather files
# Tidy data manipulation
library(purrr) # Loops
library(furrr) # Parallel loops
library(dplyr) # Manipulate data
library(tidyr) # Transform data
library(tibble) # Data frames (ish)
library(stringr) # Text manipulation/search
library(readr) # Read data
library(lubridate) # Deal with date/times
library(assertr) # Check data quality
# Data display
library(ggplot2) # Figures
library(ggrepel) # Push labels away from points in figures
library(patchwork) # Combine figures
library(gt) # HTML tables
# Spatial data
library(sf) # Work with spatial data
library(rnaturalearth) # Get maps
library(ebirdst) # Fetch species ranges
library(units) # Deal with units of distance and speed
library(ggspatial) # Plot spatial data
library(lutz) # Timezones by location
Setup
This page explains the setup code defined in XX_setup.R
which is sourced at the start of each workflow step.
Packages
Motus credentials
Here we set the credentials for this session (so you don’t have to manually type it in every time)
This code expects that you store your Motus login and password as URBAN_USER
and URBAN_PASSWORD
in an .Renviron
file
- learn more about this file: https://rstats.wtf/r-startup.html#renviron
- easily edit this file with
usethis::edit_r_environ()
:::sessionVariable(name = "userLogin", val = Sys.getenv("URBAN_USER"))
motus:::sessionVariable(name = "userPassword", val = Sys.getenv("URBAN_PASSWORD")) motus
Define projects
Here we’re using a small set of pilot projects
- Urban Ecology of Birds in the Lower mainland of BC #484
- Montreal Urban Forests #551 (191 tags but 62 on resident cardinals)
- Montreal Connectivity #373 (105 tags)
- Montreal moult migrant research #168 (113 tags)
Additionally we’re using open motus projects selected via Selecting Projects.
<- c(484, 551, 373, 168, # From Barbara and Elizabeth
projects 352, 364, 393, 417, 464, 515, 607) # Open Motus projects
<- setNames(projects, projects) projects
Define constants
Here we define any cutoffs or constants we will be using
max_flight_speed
is the maximum speed of travel we think is possible (with a buffer)max_tower_dist
is the maximum distance between towers/receivers within which a bird could be detected at both towers (and thus flight speed has no bearing)bout_cutoff
is the maximum amount of time between two runs at a specific station before those runs will be considered separate detection bouts.
<- set_units(72, "m/s")
max_flight_speed <- set_units(50, "km")
max_tower_dist <- set_units(30, "min") bout_cutoff
Data
Get species lists from NatureCounts taxonomy and join this with the species metadata from the Motus data bases.
We only need to do this to one database, as the metadata()
data step in Download/Update data adds full species lists to all databases (so metadata is the same across all databases).
Species
Databases
Load existing databases
<- map(projects, \(x) tagme(x, dir = "Data/01_Raw", update = FALSE)) dbs
Establish included species
- We match existing species in the databases to the species list from NatureCounts
- We rename scientific names to omit subspecies (e.g., myrtle yellow-rumped warbler) [required for later eBird range steps]
# naturecounts::nc_metadata() # Update naturecounts taxonomy lists as needed
<- naturecounts::meta_species_taxonomy() |>
species_list filter(order_taxon %in% c("Passeriformes", "Piciformes"))
<- tbl(dbs[[1]], "tagDeps") |>
sp # Only get species which are in our specific projects
filter(projectID %in% projects) |>
semi_join(x = tbl(dbs[[1]], "species"), y = _,
by = c("id" = "speciesID")) |>
select(-"group", -"sort") |>
distinct() |>
collect() |>
# Keep only chosen species
filter(id %in% species_list$species_id) |>
arrange(id) |>
# Fix subspecies names for joining with eBird (e.g., myrtle yellow-rumped warbler)
mutate(
scientific_motus = scientific,
scientific = str_replace(scientific, "Setophaga coronata coronata", "Setophaga coronata"))
Functions
These are the custom functions used in this workflow. Some functions are displayed in the various workflow pages, but they are all sourced from XX_functions.R
(as is the following).
<- function(src, t, id_type = "batchID", ids) {
remove_by_ID if(length(ids) > 0) {
if(t %in% DBI::dbListTables(src)) {
<- DBI::dbExecute(
n
src, ::glue("DELETE FROM {`t`} WHERE {`id_type`} IN (",
glue::glue_collapse(ids, sep = ', '),
glue")"))
if(n > 0) message(msg_fmt(" {n} rows deleted from {t}"))
}
}
}
<- function(dbs, filter) {
filter_count <- map(dbs, \(x) tbl(x, "runs") |> count(name = "n_before") |> collect()) |>
before list_rbind(names_to = "proj_id")
<- imap(dbs, \(x, y) {
after <- getRunsFilters(x, filter, y)
remove tbl(x, "runs") |>
anti_join(remove, by = c("runID", "motusTagID")) |>
count(name = "n_after") |>
collect()
|>
}) list_rbind()
bind_cols(before, after) |>
mutate(p_reduced = (n_before - n_after) / n_before * 100)
}
<- function(data, ...) {
gt_theme |>
data tab_options(
table.border.top.width = px(3),
column_labels.font.weight = "bold",
column_labels.border.bottom.width = px(3),
data_row.padding = px(7),
...
)
}
# Collect and transform ts values to date/times
<- function(db) {
collect_ts |>
db collect() |>
mutate(across(contains("ts", ignore.case = FALSE), as_datetime))
}
# Collect run/tag information for filters
<- function(db) {
collect_filter |>
db rename_with(~"motusTagID", .cols = any_of("tagID")) |>
select(runID, motusTagID) |>
mutate(probability = 0) |>
collect()
}
<- function(x, y, by = NULL, copy = FALSE, motus_filter = TRUE) {
anti_join_quick if(motus_filter) {
<- select(y, "runID", "tagID")
y if(is.null(by)) by <- c("runID", "tagID")
}<- mutate(y, ANTI = 1)
y left_join(x, y, by = by, copy = copy) |>
filter(is.na(ANTI)) |>
select(-"ANTI")
}
# ---- custom_runs ----
<- function(db) {
custom_runs # Replacement for NULL tsEnd values (i.e. today plus change)
<- round(as.numeric(Sys.time()) + 1000)
max_ts
# Get Receivers
<- tbl(db, "recvDeps") |>
r select("deviceID" = "deviceID", "recvDeployID" = "deployID", "tsStartRecv" = "tsStart", "tsEndRecv" = "tsEnd",
"recvType" = "receiverType", "recvDeployLat" = "latitude", "recvDeployLon" = "longitude") |>
mutate(tsEndRecv = if_else(is.na(tsEndRecv), max_ts, tsEndRecv))
# Get tags
<- tbl(db, "tagDeps") |>
t select("tagID", "tagDeployID" = "deployID", "speciesID", "tsStartTag" = "tsStart", "tsEndTag" = "tsEnd", "test") |>
mutate(tsEndTag = if_else(is.na(tsEndTag), max_ts, tsEndTag))
# Get batches
<- tbl(db, "batchRuns") |>
b distinct()
# Combine with runs
tbl(db, "runs") |>
rename("tagID" = "motusTagID") |>
# Add in tags by tagID *and* overlap of start/end of tag deployment with the beginning of a run
left_join(t, by = join_by(tagID, between(tsBegin, tsStartTag, tsEndTag))) |>
slice_max(order_by = tsStartTag, by = "runID", with_ties = FALSE) |> # If multiple deps, take only the one with max start
# Add in batchRuns by runID (to get the batchID)
left_join(b, by = "runID") |>
slice_max(order_by = batchID, by = "runID", with_ties = FALSE) |> # If multiple batches, take only the latest one
# Add in batches by batchID (to get the deviceID)
left_join(tbl(db, "batches") |> select("batchID", "motusDeviceID"), by = "batchID") |>
# Add in receivers by deviceID *and* overlap of receiver deployment time with the beginning of a run
left_join(r, by = join_by(motusDeviceID == deviceID, between(tsBegin, tsStartRecv, tsEndRecv))) |>
slice_max(order_by = tsStartRecv, by = "runID", with_ties = FALSE) |> # If multiple deps, take only the one with max start
rename("recvDeviceID" = "motusDeviceID") |>
select(-"batchIDbegin") |>
distinct()
}
# ---- custom_hits ----
<- function(db) {
custom_hits # Replacement for NULL tsEnd values (i.e. today plus change)
<- round(as.numeric(Sys.time()) + 1000)
max_ts
# Get Receivers
<- tbl(db, "recvDeps") |>
r select("deviceID", "recvDeployID" = "deployID", "tsStartRecv" = "tsStart", "tsEndRecv" = "tsEnd",
"recvType" = "receiverType") |>
mutate(tsEndRecv = if_else(is.na(tsEndRecv), max_ts, tsEndRecv))
# Get tags
<- tbl(db, "tagDeps") |>
t select("tagID", "tagDeployID" = "deployID", "speciesID", "tsStartTag" = "tsStart", "tsEndTag" = "tsEnd") |>
mutate(tsEndTag = if_else(is.na(tsEndTag), max_ts, tsEndTag))
# Combine with the rest - And OMIT BAD filtered observations from previous step
tbl(db, "hits") |>
select(-"validated") |>
# Add in runs and motusFilters
left_join(tbl(db, "runs") |> select("runID", "motusFilter", "tagID" = "motusTagID"), by = "runID") |>
# Add in tags by tagID *and* overlap of start/end of tag deployment with the beginning of a run
left_join(t, by = join_by(tagID, between(ts, tsStartTag, tsEndTag))) |>
# # Filter out "Bad runs" from previous step
# left_join(tbl(db, "bad_data"), by = c("runID", "tagID")) |>
# filter(is.na(BAD)) |>
# select(-"BAD") |>
# Add in batches by batchID (to get the deviceID)
left_join(tbl(db, "batches") |> select("batchID", "motusDeviceID"), by = "batchID") |>
# Add in receivers by deviceID *and* overlap of receiver deployment time with the beginning of a run
left_join(r, by = join_by(motusDeviceID == deviceID, between(ts, tsStartRecv, tsEndRecv))) |>
# Keep only relevant data
select(-"batchID", -"tsStartRecv", -"tsEndRecv", -"tsStartTag", -"tsEndTag") |>
rename("recvDeciveID" = "motusDeviceID")
}
<- function() {
load_hits map(projects, \(x) {
<- filter(arws, proj_id == x) |>
f pull(file)
open_dataset(f, format = "feather")
})
}
<- function(trans, trans_clean = NULL, bouts, tagDeployID, save = FALSE) {
plot_bouts
<- filter(trans, tagDeployID == .env$tagDeployID)
trans if(is.null(trans_clean)) trans_clean <- trans
<- filter(trans_clean, tagDeployID == .env$tagDeployID)
trans_clean <- filter(bouts, tagDeployID == .env$tagDeployID)
bouts
<- plot_map(trans)
g1
<- range(c(trans$lon1, trans$lon2)) |> diff()
lon <- range(c(trans$lat1, trans$lat2)) |> diff()
lat <- abs(lat) / abs(lon)
ratio
<- plot_coord(trans, trans_clean, bouts, "lat")
g2 <- plot_coord(trans, trans_clean, bouts, "lon")
g3
#if(ratio < 2) g_coord <- g2 + g3 else g_coord <- g2 / g3
<- g2 / g3
g_coord <- g_coord + plot_layout(guides = "collect")
g_coord
#if(ratio > 1) g <- g1 + g_coord else g <- g1 / g_coord
<- g1 + g_coord
g <- g +
g plot_annotation(title = paste0("tagDeployID: ", tagDeployID, " (",
as_date(min(bouts$timeBegin)), " to ",
as_date(max(bouts$timeEnd)), ")"))
if(save) {
ggsave(plot = g, filename = paste0("check_", tagDeployID, ".pdf"), width = 20, height = 12)
}
g
}
<- function(trans, tagDeployID = NULL) {
plot_map
if(!is.null(tagDeployID)) trans <- filter(trans, tagDeployID == .env[["tagDeployID"]])
if(!"problem" %in% names(trans)) {
$problem <- trans$problem_fast <- trans$problem_manual <- FALSE
trans$resolved <- TRUE
trans
}
<- select(trans, stn1, lat1, lon1, stn2, lat2, lon2) |>
recvs pivot_longer(cols = c("stn1", "stn2", "lat1", "lat2", "lon1", "lon2"),
names_to = c(".value", "pair"),
names_pattern = "([a-z]{2,3})(\\d+)") |>
select(-"pair") |>
distinct()
<- trans |>
bb summarize(xmin = min(c(lon1, lon2)), xmax = max(c(lon1, lon2)),
ymin = min(c(lat1, lat2)), ymax = max(c(lat1, lat2)))
<- diff(range(bb[3:4]))
ry <- diff(range(bb[1:2]))
rx
if(rx > 31 & any(!trans$problem)) {
<- filter(trans, !problem) |>
bb summarize(xmin = min(c(lon1, lon2)), xmax = max(c(lon1, lon2)),
ymin = min(c(lat1, lat2)), ymax = max(c(lat1, lat2)))
<- diff(range(bb[3:4]))
ry <- diff(range(bb[1:2]))
rx
}
if(rx < 0.5) {
<- (0.5 - rx)/2
add 1:2] <- bb[1:2] + (c(-1, 1) * add)
bb[
}if(ry < 2) {
<- (2 - ry)/2
add 3:4] <- bb[3:4] + (c(-1, 1) * add)
bb[
}
<- diff(range(bb[3:4]))
ry <- diff(range(bb[1:2]))
rx <- ry/rx
ratio
if(ratio > 2) {
<- ry/2
rx1 <- (rx1 - rx)/2
add 1:2] <- bb[1:2] + (c(-1, 1) * add)
bb[else if (ratio < 2) {
} <- rx * 2
ry1 <- (ry1 - ry)/2
add 3:4] <- bb[3:4] + (c(-1, 1) * add)
bb[
}
if(any(trans$problem_fast | trans$problem_manual)) {
<- trans |>
labs filter(problem_fast | problem_manual) |>
summarize(
lon = lon1,
lat = lat1,
recv_pair = recv_pair[1],
problem_chr = pmap_chr(list(problem_fast, problem_manual),
paste0(c("too fast", "manual")[c(x, y)], collapse = ", ")),
\(x, y) nudge_x = sign(lon2 - lon1) * ((bb[[2]] - bb[[1]]) * 0.1),
nudge_y = sign(lat1 - lat2) * ((bb[[4]] - bb[[3]]) * 0.05),
.by = trans_id) |>
select(-trans_id) |>
distinct()
else labs <- data.frame()
}
<- ggplot(trans, aes(x = lon1, y = lat1)) +
g annotation_map_tile(type = "cartolight", zoomin = 0) +
geom_spatial_segment(
data = filter(trans, resolved, problem),
aes(xend = lon2, yend = lat2), colour = "grey", linewidth = 2,
arrow = arrow(length = unit(10, "pt"), type = "closed"), crs = 4326, na.rm = TRUE) +
coord_sf(xlim = unlist(bb[1:2]), ylim = unlist(bb[3:4]), crs = 4326)
if(nrow(labs) > 0) {
<- g + geom_spatial_label(data = labs,
g aes(x = lon, y = lat, label = problem_chr),
nudge_x = labs$nudge_x, nudge_y = labs$nudge_y,
colour = "grey", crs = 4326)
}+
g geom_spatial_segment(
data = filter(trans, (resolved & !problem) | (!resolved)),
aes(xend = lon2, yend = lat2, colour = mid_date, linetype = problem),
arrow = arrow(length = unit(10, "pt"), type = "closed"), crs = 4326, na.rm = TRUE) +
geom_spatial_label_repel(data = recvs, aes(label = stn, x = lon, y = lat),
crs = 4326) +
scale_size_date(range = c(6, 3)) +
scale_linetype_manual(values = c("FALSE" = "solid", "TRUE" = "dotted"),
guide = "none") +
scale_linewidth_manual(values = c("FALSE" = 0.75, "TRUE" = 1.55),
guide = "none") +
scale_colour_viridis_c(option = "inferno", transform = "date",
begin = 0.20, end = 0.85) +
guides(colour = guide_colourbar(reverse = TRUE))
}
<- function(trans, trans_clean, bouts, coord) {
plot_coord <- range(bouts$dateBegin)
lim 2] <- lim[2] + difftime(lim[2], lim[1], units = "days") * 0.1
lim[<- as_datetime(lim)
lim
if(!"bad" %in% names(trans)) trans$bad <- FALSE
if(!"bad" %in% names(trans_clean)) trans_clean$bad <- FALSE
if(!"bad" %in% names(bouts)) bouts$bad <- FALSE
if(!"problem" %in% names(trans)) trans$problem <- FALSE
if(!"problem" %in% names(trans_clean)) trans_clean$problem <- FALSE
<- mutate(bouts, stn_group = factor(stn_group))
bouts
<- select(bouts, recvDeployLat, recvDeployLon, stn_group) |>
recvs distinct()
<- viridis::magma(n = length(recvs$stn_group), begin = 0.10, end = 0.9) |>
pal set_names(levels(recvs$stn_group))
<- farver::decode_colour(pal, "rgb", "hcl")
pal_txt <- if_else(pal_txt[, "l"] > 50, "black", "white") |>
pal_txt set_names(levels(recvs$stn_group))
<- str_subset(names(bouts), regex(coord, ignore_case = TRUE))
c1 <- paste0(coord, 1)
c2 <- paste0(coord, 2)
c3
<- filter(bouts, !bad)
b <- filter(bouts, bad)
b_bad
<- trans_clean #filter(trans, !problem | (problem & !resolved))
t <- anti_join(trans, trans_clean, by = c("id1", "id2"))
t_bad
ggplot(b, aes(x = timeBegin, y = .data[[c1]])) +
theme_bw() +
# Good bouts (or unresolved)
geom_segment(aes(xend = timeEnd, colour = stn_group), na.rm = TRUE, linewidth = 3) +
geom_point(data = arrange(b, desc(stn_group)), aes(colour = stn_group), na.rm = TRUE, size = 2) +
# Bad bouts (resolved)
geom_segment(data = b_bad, aes(xend = timeEnd), colour = "grey", na.rm = TRUE, linewidth = 3) +
geom_point(data = arrange(b_bad, desc(stn_group)), colour = "grey", na.rm = TRUE, size = 2) +
# Good trans (or unresolved)
geom_segment(data = t, aes(
x = e1, xend = b2, y = .data[[c2]], yend = .data[[c3]], linetype = problem), na.rm = TRUE) +
# Bad trans (resolved)
geom_segment(data = t_bad, aes(
x = e1, xend = b2, y = .data[[c2]], yend = .data[[c3]], linetype = problem),
colour = "grey", na.rm = TRUE) +
geom_label_repel(data = recvs, aes(label = stn_group, y = .data[[c1]], x = lim[2],
fill = stn_group, colour = stn_group),
size = 2.5, direction = "y", point.size = NA, box.padding = 0, label.padding = 0.15,
colour = pal_txt[recvs$stn_group]) +
scale_fill_manual(values = pal) +
scale_colour_manual(values = pal) +
scale_x_datetime(limits = lim)
}
<- function(hits, tagDeployID) {
plot_check <- filter(h, tagDeployID == .env[["tagDeployID"]]) |>
h arrange(time)
if(nrow(h) > 0) {
<- function(limits) {
sig_range <- abs(diff(limits))
diff case_when(
< 5 ~ limits + c(-7.5, 7.5),
diff < 10 ~ limits + c(-5, 5),
diff < 15 ~ limits + c(-2.5, 2.5),
diff TRUE ~ limits)
}
<- list()
g for(i in unique(h$trans_id)) {
<- filter(h, trans_id == i) |>
hh arrange(time) |>
mutate(problem_pair = factor(problem_pair, levels = unique(problem_pair)))
<- hh |>
h_lab summarize(time = min(time), sig = median(sig),
.by = c("recvDeployID", "problem_pair")) |>
mutate(recvDeployID = as.factor(recvDeployID))
<- ggplot(data = hh, aes(x = time, y = sig, colour = factor(recvDeployID),
g1 shape = ant)) +
theme_bw() +
theme(legend.position = "none", aspect.ratio = 1, plot.title = element_text(hjust = 0.5)) +
geom_point(size = 1) +
geom_line(stat = "smooth", alpha = 0.5, method = "gam", formula = y ~ s(x, k = 10, bs = "cs")) +
geom_label(data = h_lab, aes(label = recvDeployID, shape = NULL), size = 3, hjust = -0.2) +
scale_colour_viridis_d(option = "magma", end = 0.8) +
scale_y_continuous(limits = sig_range) +
scale_x_datetime(date_labels = "%H:%M") +
facet_wrap(~ problem_pair, scales = "free", axes = "all") +
labs(title = paste0("Transition ", i))
length(g) + 1]] <- g1
g[[
}
if(length(g) > 1) ncol <- 2 else ncol <- 1
<- wrap_plots(g, ncol = ncol) +
g plot_layout(heights = 1) +
plot_annotation(title = paste0("tagDeployID: ", tagDeployID)) &
theme(plot.margin = unit(c(1,0,1,1), units = "pt"))
else g <- NULL
}
g
}
# Create bouts from runs by adding them together and then resorting.
# The resorting helps to get at bouts that overlap each other.
<- function(x, cutoff) {
create_bouts <- x |>
x1 mutate(
lag1 = as_units(difftime(lead(timeBegin), timeEnd, units = "min")),
lag2 = as_units(difftime(lead(timeEnd), timeEnd, units = "min")),
lag = pmin(lag1, lag2),
prev_lag = lag(lag, default = set_units(Inf, "min")),
start = prev_lag >= .env$cutoff,
bout_id = cumsum(start))
|>
x1 group_by(bout_id) |>
summarize(timeBegin = min(timeBegin), timeEnd = max(timeEnd),
runID = list(unlist(runID)),
n_runs = n(),
len = list(unlist(len)),
ant = list(unlist(ant)),
total_time = sum(difftime(timeEnd, timeBegin, units = "min")),
.groups = "drop")
}
<- function(x) {
calc_trans |>
x rename(id1 = id) |>
mutate(next_stn = lead(stn_group),
id2 = lead(id1), .by = "tagDeployID") |>
filter(stn_group != next_stn) |>
select(tagDeployID, id1, id2)
}
# ---- find_overlaps ----
<- function(x2, type = "logical") {
find_overlaps #if(3551 %in% x2$stn_group) browser()
# comp B/E2 vs. B/E1
<- outer(x2$timeBegin, x2$timeBegin, '>=')
m1 <- outer(x2$timeEnd, x2$timeEnd, '<=')
m2
<- outer(x2$timeBegin, x2$timeEnd, '<=')
m3 <- outer(x2$timeEnd, x2$timeBegin, '>=')
m4
<- (m1 & m3) | (m2 & m4)
ovlps <- ovlps | t(ovlps) # either id1/id2 or id2/id1
ovlps
if(type == "df") {
<- paste0(x2$stn_group, "_", x2$bout_id)
nms dimnames(ovlps) <- list(nms, nms)
<- expand_grid(id1 = nms, id2 = nms) |>
r mutate(overlap = as.vector(ovlps)) |>
filter(overlap, id1 != id2)
else if(type == "logical") {
} <- any(colSums(ovlps) > 1)
r
}
r
}
# ---- load_runs ----
<- function() {
load_runs <- open_dataset("Data/02_Datasets/noise_runs.feather", format = "feather")
noise_runs <- open_dataset("Data/02_Datasets/noise_hits.feather", format = "feather")
noise_hits
open_dataset("Data/02_Datasets/runs", format = "feather") |>
anti_join(noise_runs, by = c("runID")) |>
anti_join(noise_hits, by = c("runID")) |>
filter(len > 2) |>
select(runID, timeBegin, timeEnd, tagID, ant, len, motusFilter, tagDeployID,
starts_with("recvDeploy"), matches("date|month|year")) |>
speciesID, arrange(timeBegin) |>
collect()
}
# ---- create_overlapping ----
<- function(bouts) {
create_overlapping nest(bouts, data = -tagDeployID) |>
mutate(overlaps = map(data, \(x) find_overlaps(x, "df"), .progress = interactive())) |>
select(-"data") |>
unnest("overlaps") |>
mutate(id1 = paste0(tagDeployID, "_", id1),
id2 = paste0(tagDeployID, "_", id2))
}
# ---- create_trans ----
<- function(bouts, overlapping_bouts, dist) {
create_trans
# Get transitions between stations
<- bind_rows(
trans |>
bouts arrange(timeBegin) |>
calc_trans(),
|>
bouts arrange(timeEnd) |>
calc_trans()
|>
) distinct() |>
# Add in any missing transitions with overlapping bouts
full_join(overlapping_bouts, by = c("tagDeployID", "id1", "id2")) |>
# Add in data related to the first station
left_join(select(bouts, "stn1" = "stn_group", "id1" = "id",
"b1" = "timeBegin", "e1" = "timeEnd",
"recv1" = "recvDeployID", "lat1" = "recvDeployLat", "lon1" = "recvDeployLon"),
by = "id1") |>
# Add in data related to the second station
left_join(select(bouts, "stn2" = "stn_group", "id2" = "id",
"b2" = "timeBegin", "e2" = "timeEnd",
"recv2" = "recvDeployID", "lat2" = "recvDeployLat", "lon2" = "recvDeployLon"),
by = "id2") |>
# Identify transition pairs
mutate(recv_pair = paste0(recv1, "_", recv2)) |>
# Add in distance between stations
left_join(select(dist, "recv_pair", "next_dist", "min_time"),
by = "recv_pair") |>
# Calculate the time and speed taken to move between stations
mutate(trans_id = row_number(),
overlap = replace_na(overlap, FALSE),
lag1 = as_units(difftime(b2, e1, units = "min")),
lag2 = as_units(difftime(e2, e1, units = "min")),
time_diff = pmin(lag1, lag2),
time_diff = set_units(time_diff, "s"),
speed = set_units(next_dist, "m") / time_diff,
time_diff = as.duration(as.numeric(time_diff)),
time_diff = if_else(overlap, duration(0), time_diff),
# Mid-point date
mid_date = as_date(e1 + (b2 - e1)/2))
if(nrow(trans) > 1) {
<- trans |>
trans # Get rid of duplicate transitions when have overlapping pairs
mutate(id_pair = map2_chr(id1, id2, \(x, y) paste0(sort(c(x, y)), collapse = " - "))) |>
arrange(b1) |>
slice(1, .by = "id_pair") |>
select(-"id_pair") |>
# Seasonal details
mutate(migration = case_when(year(e1) != year(b2) ~ NA,
month(e1) %in% 8:12 & month(b2) %in% 8:12 ~ "south",
month(e1) %in% 2:7 & month(b2) %in% 2:7 ~ "north",
TRUE ~ NA),
connected = speed > set_units(5, "m/s"))
else trans <- mutate(trans, migration = NA_character_, connected = NA)
}
trans
}
# ---- id_problems ----
<- function(trans, problems_manual = NULL) {
id_problems
# Identify suspiciously short bouts related to transitions
#b <- filter(bouts, n_runs < 5, len_max < 10) |>
# pull(id)
# Add manual problems
if(!is.null(problems_manual)) {
<- semi_join(trans, rename(problems_manual, "stn1" = "stn_group"),
trans_fix1 by = join_by("tagDeployID", "stn1", within("b1", "e1", "start", "end"))) |>
mutate(problem_manual = TRUE)
<- semi_join(trans, rename(problems_manual, "stn2" = "stn_group"),
trans_fix2 by = join_by("tagDeployID", "stn2", within("b2", "e2", "start", "end"))) |>
mutate(problem_manual = TRUE)
<- trans |>
trans mutate(problem_manual = FALSE) |>
rows_update(trans_fix1, by = c("tagDeployID", "stn1", "trans_id")) |>
rows_update(trans_fix2, by = c("tagDeployID", "stn2", "trans_id"))
else trans$problem_manual <- FALSE
}
|>
trans mutate(
# Identify problematic transitions (overlaps or too fast)
problem_fast = next_dist >= max_tower_dist & (time_diff < min_time | overlap),
# TODO: Results in 300+ ids to check... leave for now
problem_short_bout = FALSE, # TODO: if using, replace with id1 %in% b | id2 %in% b, (add bouts in as arg)
# Direction
lat_diff = lat2 - lat1,
problem_direction = (migration == "south" & lat_diff > 1) |
== "north" & lat_diff < -1),
(migration problem_direction = replace_na(problem_direction, FALSE),
# Overall
problem = problem_fast | problem_short_bout | problem_manual | problem_direction) |>
select(-matches("\\.(x|y)$"))
}
# ---- resolve_stns ----
<- function(bouts, trans, problems_manual = NULL) {
resolve_stns
<- summarize(
b
bouts,len_max = max(len_max),
#n_bouts = n(),
#n_runs = sum(n_runs),
total_time = sum(total_time),
.by = c("tagDeployID", "stn_group"))
<- filter(trans, problem) |>
t select(tagDeployID, trans_id, stn1, stn2, id1, id2, b1, e1, b2, e2, lat_diff, contains("problem")) |>
pivot_longer(cols = c("stn1", "stn2", "id1", "id2", "b1", "e1", "b2", "e2"),
names_to = c(".value", "pair"),
names_pattern = "([a-z]{1,3})(\\d+)") |>
rename("stn_group" = "stn") |>
left_join(b, by = c("tagDeployID", "stn_group")) |>
arrange(tagDeployID, trans_id, total_time) |>
select(tagDeployID, trans_id, contains("problem"), stn_group, id,
b, e,
len_max, total_time)
# Mark the station to omit within a group that was able to have a (resolved) station
# Mark manually identified problems
if(!is.null(problems_manual)) {
<- left_join(t, select(problems_manual, "tagDeployID", "stn_group", "start", "end", "omit" = "problem_manual"),
t by = join_by("tagDeployID", "stn_group", within("b", "e", "start", "end"))) |>
select(-"b", -"e", -"start", -"end")
else t$omit <- NA
}
|>
t mutate(
# Mark directional and short bout problems as FALSE (need to be manually checked)
omit = if_else(is.na(omit) & (problem_direction | problem_short_bout), FALSE, NA),
# Choose station to omit otherwise
omit = if_else(is.na(omit),
<= (suppressWarnings(max(total_time)) / 10) |
total_time <= (suppressWarnings(max(len_max)) / 10),
len_max
omit), .by = c("tagDeployID", "trans_id")) |>
mutate(resolved = any(omit), .by = c("tagDeployID", "trans_id"))
}
#' Convert unit vectors to regular numeric vectors
#'
#' @param x Vector (or column)
#' @param units Units to ensure they match
#'
<- function(x, units) {
flatten_units if(inherits(x, "difftime")) x <- as_units(x)
set_units(x, units, mode = "standard") |>
as.numeric()
}