Geography 176A

library(tidyverse)
library(sf)
library(units)
library(USAboundaries)
library(gghighlight)
library(ggrepel)
library(rmapshaper)
library(readxl)

source("../R/utils.R")

Question 1

This lab begins by preparing five tesselated surfaces from CONUS data to write and plot them.

Original CONUS data

conus = USAboundaries::us_counties(resolution = "low") %>%
  filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
  st_transform(5070) %>%
  st_centroid() %>%
  st_combine()

Simple feature CONUS data used for mapping

sf_conus = USAboundaries::us_counties(resolution = "low") %>%
  filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
  st_transform(5070) %>%
  mutate(id = 1:n())

Simplified CONUS data

usa = USAboundaries::us_counties(resolution = "low") %>%
  filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
  st_transform(5070) %>%
  st_union() %>%
  ms_simplify(keep = 0.1)

By simplifying the CONUS data, we reduce the amount of points from 3229 to 322, which significantly reduces the amount of time the computer takes to run code.

mapview::npts(usa)

## [1] 322

Voroni data

v_conus = st_voronoi(conus) %>%
  st_sf() %>%
  st_cast() %>%
  mutate(id = 1:n()) %>%
  st_intersection(usa)

Delauny tesselation data

t_conus = st_triangulate(conus) %>%
  st_sf() %>%
  st_cast() %>%
  mutate(id = 1:n()) %>%
  st_intersection(usa)

Square grid data

gs_conus = st_make_grid(usa, n = 70, square = TRUE) %>%
  st_sf() %>%
  st_cast() %>%
  mutate(id = 1:n())

Hexagonal grid data

gh_conus = st_make_grid(usa, n = 70, square = FALSE) %>%
  st_sf() %>%
  st_cast() %>%
  mutate(id = 1:n())

A function was created to easily plot our 5 maps so there’s no redundant copy and pasting. Then the 5 plots are created at the bottom using the function.

county_plot = function(plot_data, plot_title) {
  feat = plot_data %>%
    as_tibble() %>%
    select(id) %>%
    summarise(max(id))

  ggplot() +
    geom_sf(data = plot_data, color = "navy", size = 0.2) +
    aes(fillColor = "white") +
    labs(title = plot_title,
         caption = paste("Number of Features", feat)) +
    theme_void()
}

county_plot(sf_conus, "US Counties")

county_plot(v_conus, "Voroni Tesselation")

county_plot(t_conus, "Delauny Triangulation Tesselation")

county_plot(gs_conus, "Gridded Coverage")

county_plot(gh_conus, "Hexagonal Coverage")

Question 2

A function was created to summarize the tessellated surfaces.

sum_tess = function(sf, desc){
  area = st_area(sf) %>%
      units::set_units("km^2") %>%
      units::drop_units()
    
    df = data.frame(description = desc,
                    count = nrow(sf),
                    mean_area = mean(area),
                    std_area = sd(area),
                    tot_area = sum(area))
    return(df)
}

sum_tess(sf_conus, "County Summary")

##      description count mean_area std_area tot_area
## 1 County Summary  3108  2521.745 3404.325  7837583

sum_tess(v_conus, "Voroni Summary")

##      description count mean_area std_area tot_area
## 1 Voroni Summary  3107  2522.865 2885.827  7838541

sum_tess(t_conus, "Triangulate Summary")

##           description count mean_area std_area tot_area
## 1 Triangulate Summary  6196  1252.506  1576.11  7760528

sum_tess(gs_conus, "Gridded Summary")

##       description count mean_area     std_area tot_area
## 1 Gridded Summary  3129  2687.981 1.118976e-11  8410693

sum_tess(gh_conus, "Hexagonal Summary")

##         description count mean_area     std_area tot_area
## 1 Hexagonal Summary  2256  3763.052 1.608738e-11  8489445

Here the 5 summaries around bound together into one data frame, then printed as a table using knitr.

tess_summary = bind_rows(
  sum_tess(sf_conus, "County Summary"),
  sum_tess(v_conus, "Voroni Summary"),
  sum_tess(t_conus, "Triangulate Summary"),
  sum_tess(gs_conus, "Gridded Summary"),
  sum_tess(gh_conus, "Hexagonal Summary"))

knitr::kable(tess_summary,
             caption = "Tessellation Summary",
             col.names = c("Description", "Count", "Mean Area", "Standard Deviation", "Total Area"))

Tessellation Summary
Description	Count	Mean Area	Standard Deviation	Total Area
County Summary	3108	2521.745	3404.325	7837583
Voroni Summary	3107	2522.865	2885.827	7838541
Triangulate Summary	6196	1252.506	1576.110	7760528
Gridded Summary	3129	2687.981	0.000	8410693
Hexagonal Summary	2256	3763.052	0.000	8489445

Question 3

A point-in-polygon was created

dams <- read_excel("../data/NID2019_U.xlsx") %>%
  filter(!is.na(LONGITUDE) & !is.na(LATITUDE)) %>%
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
  st_transform(5070)

point_in_polygon = function(points, polygon, c_name){
  st_join(polygon, points) %>%
    st_drop_geometry() %>%
    count(get(c_name)) %>%
    rename(c("id" = "get(c_name)")) %>%
    left_join(polygon) %>%
    st_as_sf()
}

County

point_in_polygon(dams, sf_conus, "id")

Voroni

point_in_polygon(dams, v_conus, "id")

Triangulation

point_in_polygon(dams, t_conus, "id")

Square Grid

point_in_polygon(dams, gs_conus, "id")

Hexagonal Grid

point_in_polygon(dams, gh_conus, "id")

The most obvious trait out of these five tessellations is that the square and hexagonal grids have standard deviations of zero because they split the US into equal area tiles. It’s also noticable that the traigulation tessellation has the most amount of points, which ultimately results in it having the smallest mean and standard deviation. This also means triangulation tessellation takes the longest to compute.

A function to map the point-in-polygon function was created

pip_plot = function(plot_data, plot_title){
ggplot() +
  geom_sf(data = plot_data, aes(fill = log(n)), size = .2, col = NA) +
  scale_fill_viridis_c() +
  labs(title = plot_title,
       caption = paste(sum(plot_data$n))) +
  theme_void()
}

pip_plot(point_in_polygon(dams, sf_conus, "id"), "Number of Dams in US by County")

pip_plot(point_in_polygon(dams, v_conus, "id"), "Number of Dams in US by County Voroni")

pip_plot(point_in_polygon(dams, t_conus, "id"), "Number of Dams in US by County Triangulation")

pip_plot(point_in_polygon(dams, gs_conus, "id"), "Number of Dams in US by County Square Grid")

pip_plot(point_in_polygon(dams, gh_conus, "id"), "Number of Dams in US by County Hexagonal Grid")

##### Different tessellations create maps that present data very differently. Voroni and triangulation tessellations maintain county sizes more than the square or hexagonal grids, which obviously influences how many points are in each tile. While square or hexagonal tiles have equal area which may be useful in some cases, I chose to continue with the voroni tessellation because of how it better maintains the sizes of the counties, but doesn’t have as many points as the triangulation tessellation.

Question 4

A bar chart was created showing all the types of dams used in the US.

nid_classifier <- data.frame(
  abbr=c("I", "H", "C", "N", "S", "R", "P", "F", "D", "T", "O"),
  purpose=c("Irrigation", "Hydroelectric", "Flood Control", "Navigation", "Water Supply", "Recreation", "Fire Protection", "Fish and Wildlife", "Debris Control", "Tailings", "Other"))

dam_freq <- strsplit(dams$PURPOSES, split = "") %>%
  unlist() %>% 
  table() %>% 
  as.data.frame() %>% 
  setNames(c("abbr", "count")) %>% 
  left_join(nid_classifier) %>% 
  mutate(lab = paste0(purpose, "\n(", abbr, ")"))

dam_freq %>% 
  ggplot(aes(x = lab, y = count)) +
           geom_bar(stat = "identity") +
  coord_flip()

Plots were created for hydroelectric, fish and wildlife, irrigation, and fire protection dams. I chose these four because they were the most interesting to me and I was curious to see if the placement of the different types of dams would match my assumtions of where I thought they should be.

hydro = dams %>% 
  filter(grepl("H", PURPOSES))

fish = dams %>% 
  filter(grepl("F", PURPOSES))

irrigation = dams %>% 
  filter(grepl("I", PURPOSES))

fire = dams %>% 
  filter(grepl("P", PURPOSES))


pip_plot(point_in_polygon(hydro, v_conus, "id"), "Hydro Dams in US") + 
  gghighlight(n > (mean(n) + sd(n)))

pip_plot(point_in_polygon(fish, v_conus, "id"), "Fish and Wildlife Dams in US") + 
  gghighlight(n > (mean(n) + sd(n)))

pip_plot(point_in_polygon(irrigation, v_conus, "id"), "Irrigation Dams in US") + 
  gghighlight(n > (mean(n) + sd(n)))

pip_plot(point_in_polygon(fire, v_conus, "id"), "Fire Protection Dams in US") + 
  gghighlight(n > (mean(n) + sd(n)))