Install packages and retrieve COVID data.
library(tidyverse)
library(knitr)
library(readxl)
library(zoo)
url = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
covid = read_csv(url)
Filter California data, calculate new cases per day.
dat <- covid %>%
filter(state == "California") %>%
group_by(county) %>%
mutate(newCases = cases - lag(cases)) %>%
ungroup() %>%
filter(date == max(date))
most_cases <- dat %>%
slice_max(cases, n= 5) %>%
select(county, cases)
knitr::kable(most_cases,
caption = "Most Cases California Counties",
col.names = c("County", "Cases"),
format.args = list(big.mark = ","))
Most Cases California Counties
Los Angeles |
225,827 |
Riverside |
48,630 |
Orange |
44,507 |
San Bernardino |
42,947 |
San Diego |
35,439 |
most_new_cases <- dat %>%
slice_max(newCases, n = 5) %>%
select(county, newCases)
knitr::kable(most_new_cases,
caption = "Most New Cases California Counties",
col.names = c("County", "New Cases"),
format.args = list(big.mark = ","))
Most New Cases California Counties
Los Angeles |
1,796 |
San Bernardino |
797 |
Riverside |
430 |
Alameda |
382 |
Sonoma |
314 |
Load population estimate from census data.
library(readxl)
PopulationEstimates <- read_excel("data/PopulationEstimates.xls",
skip = 2)
names(PopulationEstimates)
dim(PopulationEstimates)
nrow(PopulationEstimates)
str(PopulationEstimates)
Join California COVID data with census population data.
PopulationEstimates <- rename(PopulationEstimates, fips = FIPStxt)
dat_pop <- left_join(dat, PopulationEstimates, by = "fips")
Show most new cases in California counties per capita
dat_pop_casespercap <- mutate(dat_pop, cpc = cases/POP_ESTIMATE_2019 * 100)
dat_pop_top5 <- dat_pop_casespercap %>%
slice_max(cpc, n = 5) %>%
select(county, cpc, cases, POP_ESTIMATE_2019)
knitr::kable(dat_pop_top5,
caption = "Most New Cases California Counties Per Capita",
col.names = c("County", "CPC", "Cases", "Population 2019"),
format.args = list(big.mark = ","))
Most New Cases California Counties Per Capita
Imperial |
5.686064 |
10,304 |
181,215 |
Kings |
3.681836 |
5,631 |
152,940 |
Kern |
3.019767 |
27,184 |
900,202 |
Tulare |
2.753354 |
12,836 |
466,195 |
Merced |
2.603356 |
7,229 |
277,680 |
Show most new cases by California county.
dat_pop_newtot5 <- dat_pop_casespercap %>%
slice_max(newCases, n = 5) %>%
select(county, newCases)
knitr::kable(dat_pop_newtot5,
caption = "Most New Cases California Counties",
col.names = c("County", "New Cases"),
format.args = list(big.mark = ","))
Most New Cases California Counties
Los Angeles |
1,796 |
San Bernardino |
797 |
Riverside |
430 |
Alameda |
382 |
Sonoma |
314 |
Show new cases per 100,000 people.
dat14 <- covid %>%
filter(state == "California") %>%
group_by(county) %>%
mutate(newCases = cases - lag(cases)) %>%
ungroup() %>%
filter(date > max(date) - 14)
dat14_pop <- left_join(dat14, PopulationEstimates, by = "fips")
dat14_pop_sum <- dat14_pop %>%
group_by(county) %>%
summarise(tot_new = sum(newCases), new100k = sum(newCases) / POP_ESTIMATE_2019 * 100000) %>%
unique()
knitr::kable(dat14_pop_sum,
caption = "New Cases Per 100,000 People",
col.names = c("County", "New Cases", "New Per 100k"),
format.args = list(big.mark = ","))
New Cases Per 100,000 People
Alameda |
3,700 |
221.38071 |
Alpine |
0 |
0.00000 |
Amador |
62 |
155.96700 |
Butte |
465 |
212.14859 |
Calaveras |
45 |
98.02854 |
Colusa |
62 |
287.74307 |
Contra Costa |
3,562 |
308.79235 |
Del Norte |
22 |
79.10255 |
El Dorado |
175 |
90.74740 |
Fresno |
5,044 |
504.85386 |
Glenn |
77 |
271.19360 |
Humboldt |
78 |
57.53995 |
Imperial |
758 |
418.28767 |
Inyo |
65 |
360.33040 |
Kern |
5,751 |
638.85661 |
Kings |
1,178 |
770.23669 |
Lake |
64 |
99.40049 |
Lassen |
71 |
232.23105 |
Los Angeles |
27,915 |
278.06258 |
Madera |
1,075 |
683.29022 |
Marin |
642 |
248.04309 |
Mariposa |
10 |
58.12940 |
Mendocino |
223 |
257.06348 |
Merced |
2,469 |
889.15298 |
Modoc |
1 |
11.31094 |
Mono |
14 |
96.92606 |
Monterey |
1,736 |
399.94379 |
Napa |
275 |
199.64572 |
Nevada |
72 |
72.17683 |
Orange |
6,376 |
200.77514 |
Placer |
665 |
166.94742 |
Plumas |
8 |
42.53735 |
Riverside |
9,639 |
390.15667 |
Sacramento |
4,516 |
290.96851 |
San Benito |
178 |
283.40339 |
San Bernardino |
8,710 |
399.52571 |
San Diego |
4,512 |
135.15740 |
San Francisco |
1,447 |
164.14289 |
San Joaquin |
3,292 |
431.93710 |
San Luis Obispo |
592 |
209.10526 |
San Mateo |
1,563 |
203.89448 |
Santa Barbara |
917 |
205.37560 |
Santa Clara |
3,957 |
205.25435 |
Santa Cruz |
344 |
125.90909 |
Shasta |
111 |
61.63927 |
Sierra |
5 |
166.38935 |
Siskiyou |
25 |
57.41978 |
Solano |
1,059 |
236.57245 |
Sonoma |
1,571 |
317.80004 |
Stanislaus |
3,576 |
649.40254 |
Sutter |
305 |
314.52702 |
Tehama |
98 |
150.57464 |
Trinity |
3 |
24.42002 |
Tulare |
2,758 |
591.59794 |
Tuolumne |
22 |
40.38327 |
Ventura |
1,356 |
160.28255 |
Yolo |
482 |
218.59410 |
Yuba |
268 |
340.67219 |
Show four state daily new cases and rolling seven day mean.
states4 <- covid %>%
filter(state %in% c("New York", "California", "Louisiana", "Florida")) %>%
group_by(state, county) %>%
mutate(newCases = cases - lag(cases),
dailySeven = rollmean(newCases, 7, fill=NA, align="right"))
ggplot(data = states4) +
geom_col(aes(x = date, y = newCases), fill = "darkred") +
geom_line(aes(x = date, y = dailySeven), color = "plum") +
labs(title = "Four State Daily New Cases and Rolling Seven Day Mean",
x = "Date",
y = "Cases") +
facet_wrap(~state, scales = "free") +
theme_minimal()
Show four state daily new cases and rolling seven day mean per capita.
states4_pop <- left_join(states4, PopulationEstimates, by = "fips") %>%
mutate(newcasespercapita = newCases/POP_ESTIMATE_2019 * 100,
dailySevenpercapita = dailySeven / POP_ESTIMATE_2019 * 100)
ggplot(data = states4_pop) +
geom_col(aes(x = date, y = newcasespercapita), fill = "sienna1") +
geom_line(aes(x = date, y = dailySevenpercapita), color = "olivedrab") +
labs(title = "Four State Daily New Cases and Rolling Seven Day Mean Per Capita",
x = "Date",
y = "Cases") +
facet_wrap(~state, scales = "free") +
theme_minimal()