library(tidyverse)
library(sf)
library(units)
library(USAboundaries)
library(gghighlight)
library(ggrepel)
library(rmapshaper)
library(readxl)

source("../R/utils.R")

Question 1

This lab begins by preparing five tesselated surfaces from CONUS data to write and plot them.

Original CONUS data
conus = USAboundaries::us_counties(resolution = "low") %>%
  filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
  st_transform(5070) %>%
  st_centroid() %>%
  st_combine()
Simple feature CONUS data used for mapping
sf_conus = USAboundaries::us_counties(resolution = "low") %>%
  filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
  st_transform(5070) %>%
  mutate(id = 1:n())
Simplified CONUS data
usa = USAboundaries::us_counties(resolution = "low") %>%
  filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
  st_transform(5070) %>%
  st_union() %>%
  ms_simplify(keep = 0.1)
By simplifying the CONUS data, we reduce the amount of points from 3229 to 322, which significantly reduces the amount of time the computer takes to run code.
mapview::npts(usa)
## [1] 322
Voroni data
v_conus = st_voronoi(conus) %>%
  st_sf() %>%
  st_cast() %>%
  mutate(id = 1:n()) %>%
  st_intersection(usa)
Delauny tesselation data
t_conus = st_triangulate(conus) %>%
  st_sf() %>%
  st_cast() %>%
  mutate(id = 1:n()) %>%
  st_intersection(usa)
Square grid data
gs_conus = st_make_grid(usa, n = 70, square = TRUE) %>%
  st_sf() %>%
  st_cast() %>%
  mutate(id = 1:n())
Hexagonal grid data
gh_conus = st_make_grid(usa, n = 70, square = FALSE) %>%
  st_sf() %>%
  st_cast() %>%
  mutate(id = 1:n())

A function was created to easily plot our 5 maps so there’s no redundant copy and pasting. Then the 5 plots are created at the bottom using the function.

county_plot = function(plot_data, plot_title) {
  feat = plot_data %>%
    as_tibble() %>%
    select(id) %>%
    summarise(max(id))

  ggplot() +
    geom_sf(data = plot_data, color = "navy", size = 0.2) +
    aes(fillColor = "white") +
    labs(title = plot_title,
         caption = paste("Number of Features", feat)) +
    theme_void()
}

county_plot(sf_conus, "US Counties")

county_plot(v_conus, "Voroni Tesselation")

county_plot(t_conus, "Delauny Triangulation Tesselation")

county_plot(gs_conus, "Gridded Coverage")

county_plot(gh_conus, "Hexagonal Coverage")

Question 2

A function was created to summarize the tessellated surfaces.

sum_tess = function(sf, desc){
  area = st_area(sf) %>%
      units::set_units("km^2") %>%
      units::drop_units()
    
    df = data.frame(description = desc,
                    count = nrow(sf),
                    mean_area = mean(area),
                    std_area = sd(area),
                    tot_area = sum(area))
    return(df)
}

sum_tess(sf_conus, "County Summary")
##      description count mean_area std_area tot_area
## 1 County Summary  3108  2521.745 3404.325  7837583
sum_tess(v_conus, "Voroni Summary")
##      description count mean_area std_area tot_area
## 1 Voroni Summary  3107  2522.865 2885.827  7838541
sum_tess(t_conus, "Triangulate Summary")
##           description count mean_area std_area tot_area
## 1 Triangulate Summary  6196  1252.506  1576.11  7760528
sum_tess(gs_conus, "Gridded Summary")
##       description count mean_area     std_area tot_area
## 1 Gridded Summary  3129  2687.981 1.118976e-11  8410693
sum_tess(gh_conus, "Hexagonal Summary")
##         description count mean_area     std_area tot_area
## 1 Hexagonal Summary  2256  3763.052 1.608738e-11  8489445
Here the 5 summaries around bound together into one data frame, then printed as a table using knitr.
tess_summary = bind_rows(
  sum_tess(sf_conus, "County Summary"),
  sum_tess(v_conus, "Voroni Summary"),
  sum_tess(t_conus, "Triangulate Summary"),
  sum_tess(gs_conus, "Gridded Summary"),
  sum_tess(gh_conus, "Hexagonal Summary"))

knitr::kable(tess_summary,
             caption = "Tessellation Summary",
             col.names = c("Description", "Count", "Mean Area", "Standard Deviation", "Total Area"))
Tessellation Summary
Description Count Mean Area Standard Deviation Total Area
County Summary 3108 2521.745 3404.325 7837583
Voroni Summary 3107 2522.865 2885.827 7838541
Triangulate Summary 6196 1252.506 1576.110 7760528
Gridded Summary 3129 2687.981 0.000 8410693
Hexagonal Summary 2256 3763.052 0.000 8489445

Question 3

A point-in-polygon was created

dams <- read_excel("../data/NID2019_U.xlsx") %>%
  filter(!is.na(LONGITUDE) & !is.na(LATITUDE)) %>%
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
  st_transform(5070)

point_in_polygon = function(points, polygon, c_name){
  st_join(polygon, points) %>%
    st_drop_geometry() %>%
    count(get(c_name)) %>%
    rename(c("id" = "get(c_name)")) %>%
    left_join(polygon) %>%
    st_as_sf()
}
County
point_in_polygon(dams, sf_conus, "id")
Voroni
point_in_polygon(dams, v_conus, "id")
Triangulation
point_in_polygon(dams, t_conus, "id")
Square Grid
point_in_polygon(dams, gs_conus, "id")
Hexagonal Grid
point_in_polygon(dams, gh_conus, "id")
The most obvious trait out of these five tessellations is that the square and hexagonal grids have standard deviations of zero because they split the US into equal area tiles. It’s also noticable that the traigulation tessellation has the most amount of points, which ultimately results in it having the smallest mean and standard deviation. This also means triangulation tessellation takes the longest to compute.

A function to map the point-in-polygon function was created

pip_plot = function(plot_data, plot_title){
ggplot() +
  geom_sf(data = plot_data, aes(fill = log(n)), size = .2, col = NA) +
  scale_fill_viridis_c() +
  labs(title = plot_title,
       caption = paste(sum(plot_data$n))) +
  theme_void()
}

pip_plot(point_in_polygon(dams, sf_conus, "id"), "Number of Dams in US by County")

pip_plot(point_in_polygon(dams, v_conus, "id"), "Number of Dams in US by County Voroni")

pip_plot(point_in_polygon(dams, t_conus, "id"), "Number of Dams in US by County Triangulation")

pip_plot(point_in_polygon(dams, gs_conus, "id"), "Number of Dams in US by County Square Grid")

pip_plot(point_in_polygon(dams, gh_conus, "id"), "Number of Dams in US by County Hexagonal Grid")

##### Different tessellations create maps that present data very differently. Voroni and triangulation tessellations maintain county sizes more than the square or hexagonal grids, which obviously influences how many points are in each tile. While square or hexagonal tiles have equal area which may be useful in some cases, I chose to continue with the voroni tessellation because of how it better maintains the sizes of the counties, but doesn’t have as many points as the triangulation tessellation.

Question 4

A bar chart was created showing all the types of dams used in the US.

nid_classifier <- data.frame(
  abbr=c("I", "H", "C", "N", "S", "R", "P", "F", "D", "T", "O"),
  purpose=c("Irrigation", "Hydroelectric", "Flood Control", "Navigation", "Water Supply", "Recreation", "Fire Protection", "Fish and Wildlife", "Debris Control", "Tailings", "Other"))

dam_freq <- strsplit(dams$PURPOSES, split = "") %>%
  unlist() %>% 
  table() %>% 
  as.data.frame() %>% 
  setNames(c("abbr", "count")) %>% 
  left_join(nid_classifier) %>% 
  mutate(lab = paste0(purpose, "\n(", abbr, ")"))

dam_freq %>% 
  ggplot(aes(x = lab, y = count)) +
           geom_bar(stat = "identity") +
  coord_flip()

Plots were created for hydroelectric, fish and wildlife, irrigation, and fire protection dams. I chose these four because they were the most interesting to me and I was curious to see if the placement of the different types of dams would match my assumtions of where I thought they should be.

hydro = dams %>% 
  filter(grepl("H", PURPOSES))

fish = dams %>% 
  filter(grepl("F", PURPOSES))

irrigation = dams %>% 
  filter(grepl("I", PURPOSES))

fire = dams %>% 
  filter(grepl("P", PURPOSES))


pip_plot(point_in_polygon(hydro, v_conus, "id"), "Hydro Dams in US") + 
  gghighlight(n > (mean(n) + sd(n)))

pip_plot(point_in_polygon(fish, v_conus, "id"), "Fish and Wildlife Dams in US") + 
  gghighlight(n > (mean(n) + sd(n)))

pip_plot(point_in_polygon(irrigation, v_conus, "id"), "Irrigation Dams in US") + 
  gghighlight(n > (mean(n) + sd(n)))

pip_plot(point_in_polygon(fire, v_conus, "id"), "Fire Protection Dams in US") + 
  gghighlight(n > (mean(n) + sd(n)))