library(tidyverse)
library(sf)
library(units)
library(USAboundaries)
library(gghighlight)
library(ggrepel)
library(rmapshaper)
library(readxl)
source("../R/utils.R")
conus = USAboundaries::us_counties(resolution = "low") %>%
filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
st_transform(5070) %>%
st_centroid() %>%
st_combine()
sf_conus = USAboundaries::us_counties(resolution = "low") %>%
filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
st_transform(5070) %>%
mutate(id = 1:n())
usa = USAboundaries::us_counties(resolution = "low") %>%
filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
st_transform(5070) %>%
st_union() %>%
ms_simplify(keep = 0.1)
mapview::npts(usa)
## [1] 322
v_conus = st_voronoi(conus) %>%
st_sf() %>%
st_cast() %>%
mutate(id = 1:n()) %>%
st_intersection(usa)
t_conus = st_triangulate(conus) %>%
st_sf() %>%
st_cast() %>%
mutate(id = 1:n()) %>%
st_intersection(usa)
gs_conus = st_make_grid(usa, n = 70, square = TRUE) %>%
st_sf() %>%
st_cast() %>%
mutate(id = 1:n())
gh_conus = st_make_grid(usa, n = 70, square = FALSE) %>%
st_sf() %>%
st_cast() %>%
mutate(id = 1:n())
county_plot = function(plot_data, plot_title) {
feat = plot_data %>%
as_tibble() %>%
select(id) %>%
summarise(max(id))
ggplot() +
geom_sf(data = plot_data, color = "navy", size = 0.2) +
aes(fillColor = "white") +
labs(title = plot_title,
caption = paste("Number of Features", feat)) +
theme_void()
}
county_plot(sf_conus, "US Counties")
county_plot(v_conus, "Voroni Tesselation")
county_plot(t_conus, "Delauny Triangulation Tesselation")
county_plot(gs_conus, "Gridded Coverage")
county_plot(gh_conus, "Hexagonal Coverage")
sum_tess = function(sf, desc){
area = st_area(sf) %>%
units::set_units("km^2") %>%
units::drop_units()
df = data.frame(description = desc,
count = nrow(sf),
mean_area = mean(area),
std_area = sd(area),
tot_area = sum(area))
return(df)
}
sum_tess(sf_conus, "County Summary")
## description count mean_area std_area tot_area
## 1 County Summary 3108 2521.745 3404.325 7837583
sum_tess(v_conus, "Voroni Summary")
## description count mean_area std_area tot_area
## 1 Voroni Summary 3107 2522.865 2885.827 7838541
sum_tess(t_conus, "Triangulate Summary")
## description count mean_area std_area tot_area
## 1 Triangulate Summary 6196 1252.506 1576.11 7760528
sum_tess(gs_conus, "Gridded Summary")
## description count mean_area std_area tot_area
## 1 Gridded Summary 3129 2687.981 1.118976e-11 8410693
sum_tess(gh_conus, "Hexagonal Summary")
## description count mean_area std_area tot_area
## 1 Hexagonal Summary 2256 3763.052 1.608738e-11 8489445
tess_summary = bind_rows(
sum_tess(sf_conus, "County Summary"),
sum_tess(v_conus, "Voroni Summary"),
sum_tess(t_conus, "Triangulate Summary"),
sum_tess(gs_conus, "Gridded Summary"),
sum_tess(gh_conus, "Hexagonal Summary"))
knitr::kable(tess_summary,
caption = "Tessellation Summary",
col.names = c("Description", "Count", "Mean Area", "Standard Deviation", "Total Area"))
Description | Count | Mean Area | Standard Deviation | Total Area |
---|---|---|---|---|
County Summary | 3108 | 2521.745 | 3404.325 | 7837583 |
Voroni Summary | 3107 | 2522.865 | 2885.827 | 7838541 |
Triangulate Summary | 6196 | 1252.506 | 1576.110 | 7760528 |
Gridded Summary | 3129 | 2687.981 | 0.000 | 8410693 |
Hexagonal Summary | 2256 | 3763.052 | 0.000 | 8489445 |
dams <- read_excel("../data/NID2019_U.xlsx") %>%
filter(!is.na(LONGITUDE) & !is.na(LATITUDE)) %>%
st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
st_transform(5070)
point_in_polygon = function(points, polygon, c_name){
st_join(polygon, points) %>%
st_drop_geometry() %>%
count(get(c_name)) %>%
rename(c("id" = "get(c_name)")) %>%
left_join(polygon) %>%
st_as_sf()
}
point_in_polygon(dams, sf_conus, "id")
point_in_polygon(dams, v_conus, "id")
point_in_polygon(dams, t_conus, "id")
point_in_polygon(dams, gs_conus, "id")
point_in_polygon(dams, gh_conus, "id")
pip_plot = function(plot_data, plot_title){
ggplot() +
geom_sf(data = plot_data, aes(fill = log(n)), size = .2, col = NA) +
scale_fill_viridis_c() +
labs(title = plot_title,
caption = paste(sum(plot_data$n))) +
theme_void()
}
pip_plot(point_in_polygon(dams, sf_conus, "id"), "Number of Dams in US by County")
pip_plot(point_in_polygon(dams, v_conus, "id"), "Number of Dams in US by County Voroni")
pip_plot(point_in_polygon(dams, t_conus, "id"), "Number of Dams in US by County Triangulation")
pip_plot(point_in_polygon(dams, gs_conus, "id"), "Number of Dams in US by County Square Grid")
pip_plot(point_in_polygon(dams, gh_conus, "id"), "Number of Dams in US by County Hexagonal Grid")
##### Different tessellations create maps that present data very differently. Voroni and triangulation tessellations maintain county sizes more than the square or hexagonal grids, which obviously influences how many points are in each tile. While square or hexagonal tiles have equal area which may be useful in some cases, I chose to continue with the voroni tessellation because of how it better maintains the sizes of the counties, but doesn’t have as many points as the triangulation tessellation.
nid_classifier <- data.frame(
abbr=c("I", "H", "C", "N", "S", "R", "P", "F", "D", "T", "O"),
purpose=c("Irrigation", "Hydroelectric", "Flood Control", "Navigation", "Water Supply", "Recreation", "Fire Protection", "Fish and Wildlife", "Debris Control", "Tailings", "Other"))
dam_freq <- strsplit(dams$PURPOSES, split = "") %>%
unlist() %>%
table() %>%
as.data.frame() %>%
setNames(c("abbr", "count")) %>%
left_join(nid_classifier) %>%
mutate(lab = paste0(purpose, "\n(", abbr, ")"))
dam_freq %>%
ggplot(aes(x = lab, y = count)) +
geom_bar(stat = "identity") +
coord_flip()
hydro = dams %>%
filter(grepl("H", PURPOSES))
fish = dams %>%
filter(grepl("F", PURPOSES))
irrigation = dams %>%
filter(grepl("I", PURPOSES))
fire = dams %>%
filter(grepl("P", PURPOSES))
pip_plot(point_in_polygon(hydro, v_conus, "id"), "Hydro Dams in US") +
gghighlight(n > (mean(n) + sd(n)))
pip_plot(point_in_polygon(fish, v_conus, "id"), "Fish and Wildlife Dams in US") +
gghighlight(n > (mean(n) + sd(n)))
pip_plot(point_in_polygon(irrigation, v_conus, "id"), "Irrigation Dams in US") +
gghighlight(n > (mean(n) + sd(n)))
pip_plot(point_in_polygon(fire, v_conus, "id"), "Fire Protection Dams in US") +
gghighlight(n > (mean(n) + sd(n)))