Install packages and retrieve COVID data.

library(tidyverse)
library(knitr)
library(readxl)
library(zoo)

url = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"

covid = read_csv(url)

Filter California data, calculate new cases per day.

dat <- covid %>% 
  filter(state == "California") %>% 
  group_by(county) %>% 
  mutate(newCases = cases - lag(cases)) %>% 
  ungroup() %>% 
  filter(date == max(date))

most_cases <- dat %>% 
  slice_max(cases, n= 5) %>% 
  select(county, cases)

knitr::kable(most_cases, 
             caption = "Most Cases California Counties",
             col.names = c("County", "Cases"),
             format.args = list(big.mark = ","))
Most Cases California Counties
County Cases
Los Angeles 225,827
Riverside 48,630
Orange 44,507
San Bernardino 42,947
San Diego 35,439
most_new_cases <- dat %>% 
  slice_max(newCases, n = 5) %>% 
  select(county, newCases)

knitr::kable(most_new_cases, 
             caption = "Most New Cases California Counties",
             col.names = c("County", "New Cases"),
             format.args = list(big.mark = ","))
Most New Cases California Counties
County New Cases
Los Angeles 1,796
San Bernardino 797
Riverside 430
Alameda 382
Sonoma 314

Load population estimate from census data.

library(readxl)
PopulationEstimates <- read_excel("data/PopulationEstimates.xls", 
                                  skip = 2)
names(PopulationEstimates)
dim(PopulationEstimates)
nrow(PopulationEstimates)
str(PopulationEstimates)

Join California COVID data with census population data.

PopulationEstimates <- rename(PopulationEstimates, fips = FIPStxt)

dat_pop <- left_join(dat, PopulationEstimates, by = "fips")

Show most new cases in California counties per capita

dat_pop_casespercap <- mutate(dat_pop, cpc = cases/POP_ESTIMATE_2019 * 100)

dat_pop_top5 <- dat_pop_casespercap %>% 
  slice_max(cpc, n = 5) %>% 
  select(county, cpc, cases, POP_ESTIMATE_2019)

knitr::kable(dat_pop_top5, 
             caption = "Most New Cases California Counties Per Capita",
             col.names = c("County", "CPC", "Cases", "Population 2019"),
             format.args = list(big.mark = ","))
Most New Cases California Counties Per Capita
County CPC Cases Population 2019
Imperial 5.686064 10,304 181,215
Kings 3.681836 5,631 152,940
Kern 3.019767 27,184 900,202
Tulare 2.753354 12,836 466,195
Merced 2.603356 7,229 277,680

Show most new cases by California county.

dat_pop_newtot5 <- dat_pop_casespercap %>% 
  slice_max(newCases, n = 5) %>% 
  select(county, newCases)

knitr::kable(dat_pop_newtot5, 
             caption = "Most New Cases California Counties",
             col.names = c("County", "New Cases"),
             format.args = list(big.mark = ","))
Most New Cases California Counties
County New Cases
Los Angeles 1,796
San Bernardino 797
Riverside 430
Alameda 382
Sonoma 314

Show new cases per 100,000 people.

dat14 <- covid %>% 
  filter(state == "California") %>% 
  group_by(county) %>% 
  mutate(newCases = cases - lag(cases)) %>% 
  ungroup() %>% 
  filter(date > max(date) - 14)

dat14_pop <- left_join(dat14, PopulationEstimates, by = "fips")

dat14_pop_sum <- dat14_pop %>% 
  group_by(county) %>% 
  summarise(tot_new = sum(newCases), new100k = sum(newCases) / POP_ESTIMATE_2019 * 100000) %>% 
  unique()

knitr::kable(dat14_pop_sum, 
             caption = "New Cases Per 100,000 People",
             col.names = c("County", "New Cases", "New Per 100k"),
             format.args = list(big.mark = ","))
New Cases Per 100,000 People
County New Cases New Per 100k
Alameda 3,700 221.38071
Alpine 0 0.00000
Amador 62 155.96700
Butte 465 212.14859
Calaveras 45 98.02854
Colusa 62 287.74307
Contra Costa 3,562 308.79235
Del Norte 22 79.10255
El Dorado 175 90.74740
Fresno 5,044 504.85386
Glenn 77 271.19360
Humboldt 78 57.53995
Imperial 758 418.28767
Inyo 65 360.33040
Kern 5,751 638.85661
Kings 1,178 770.23669
Lake 64 99.40049
Lassen 71 232.23105
Los Angeles 27,915 278.06258
Madera 1,075 683.29022
Marin 642 248.04309
Mariposa 10 58.12940
Mendocino 223 257.06348
Merced 2,469 889.15298
Modoc 1 11.31094
Mono 14 96.92606
Monterey 1,736 399.94379
Napa 275 199.64572
Nevada 72 72.17683
Orange 6,376 200.77514
Placer 665 166.94742
Plumas 8 42.53735
Riverside 9,639 390.15667
Sacramento 4,516 290.96851
San Benito 178 283.40339
San Bernardino 8,710 399.52571
San Diego 4,512 135.15740
San Francisco 1,447 164.14289
San Joaquin 3,292 431.93710
San Luis Obispo 592 209.10526
San Mateo 1,563 203.89448
Santa Barbara 917 205.37560
Santa Clara 3,957 205.25435
Santa Cruz 344 125.90909
Shasta 111 61.63927
Sierra 5 166.38935
Siskiyou 25 57.41978
Solano 1,059 236.57245
Sonoma 1,571 317.80004
Stanislaus 3,576 649.40254
Sutter 305 314.52702
Tehama 98 150.57464
Trinity 3 24.42002
Tulare 2,758 591.59794
Tuolumne 22 40.38327
Ventura 1,356 160.28255
Yolo 482 218.59410
Yuba 268 340.67219

Show four state daily new cases and rolling seven day mean.

states4 <- covid %>% 
  filter(state %in% c("New York", "California", "Louisiana", "Florida")) %>% 
  group_by(state, county) %>%
  mutate(newCases = cases - lag(cases),
         dailySeven = rollmean(newCases, 7, fill=NA, align="right"))

ggplot(data = states4) +
  geom_col(aes(x = date, y = newCases), fill = "darkred") +
  geom_line(aes(x = date, y = dailySeven), color = "plum") +
  labs(title = "Four State Daily New Cases and Rolling Seven Day Mean",  
       x = "Date",  
       y = "Cases") +
  facet_wrap(~state, scales = "free") +
  theme_minimal()

Show four state daily new cases and rolling seven day mean per capita.

states4_pop <- left_join(states4, PopulationEstimates, by = "fips") %>% 
  mutate(newcasespercapita = newCases/POP_ESTIMATE_2019 * 100,
         dailySevenpercapita = dailySeven / POP_ESTIMATE_2019 * 100)

ggplot(data = states4_pop) +
  geom_col(aes(x = date, y = newcasespercapita), fill = "sienna1") +
  geom_line(aes(x = date, y = dailySevenpercapita), color = "olivedrab") +
  labs(title = "Four State Daily New Cases and Rolling Seven Day Mean Per Capita",  
       x = "Date",  
       y = "Cases") +
  facet_wrap(~state, scales = "free") +
  theme_minimal()