library(foreign)
library(psych)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggpubr)
library(egg)
library(survey)
library(forcats)
library(egg)
library(lubridate)
Let’s take a quick look at our data. We have ATUS data from 2019 to 2021. ATUS, initiated in 2003, employs a computer-assisted telephone interview method to collect information from a nationally representative sample of the U.S. noninstitutionalized population. One individual aged at least 15 years old in each selected household retrospectively reported daily activities and corresponding time intervals throughout the 24 hours. Data contain basic demographic information and time (in minutes) spent on each of the following activities:
ATUS<-read.dta("/Users/jiaoyu/Documents/Ph.D/projects/ATUS/data/atus_R.dta",convert.factors = F)
head(ATUS)
## year caseid region statefip county hh_numkids month age engage engage_n
## 1 2019 2.01901e+13 3 48 48000 0 1 85 0 0
## 2 2019 2.01901e+13 2 26 26000 1 1 25 0 0
## 3 2019 2.01901e+13 1 36 36047 0 1 20 0 0
## 4 2019 2.01901e+13 1 36 36005 0 1 61 0 0
## 5 2019 2.01901e+13 1 50 50000 1 1 34 55 55
## 6 2019 2.01901e+13 1 25 25017 0 1 53 192 192
## volunteer_h2 volunteer_nh2 religion_h2 religion_nh2 civic_h2 civic_nh2
## 1 0 0 0 240 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 90 20 0 0 0 0
## social_leisure_nh2 social_leisure_h2 education_nh2 education_h2 phone_h2
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 35 0 0 0 0
## 6 0 40 0 0 60
## phone_nh2 engage_h_not engage_nh_not tv volunteer_h1 volunteer_nh1
## 1 0 0 240 NA 0 0
## 2 0 0 0 NA 0 0
## 3 0 0 0 NA 0 0
## 4 0 0 0 NA 0 0
## 5 0 0 35 NA 0 0
## 6 0 190 20 NA 90 102
## religion_h1 religion_nh1 civic_h1 civic_nh1 social_leisure_nh1
## 1 0 300 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 55
## 6 0 0 0 0 0
## social_leisure_h1 education_nh1 education_h1 phone_h1 phone_nh1 engage_h
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 40 0 0 60 0 190
## engage_nh weekday season female mar foreignborn edu4 employ spouse race4
## 1 300 0 4 1 0 0 3 2 0 2
## 2 0 1 4 1 0 0 1 1 1 3
## 3 0 1 4 1 0 0 3 1 0 3
## 4 0 1 4 1 1 0 3 2 1 3
## 5 55 0 4 0 1 0 4 1 1 1
## 6 102 1 4 1 0 0 4 1 1 1
## weight wtn weight_inter ym civic_edu_h civic_edu_nh diffany age3
## 1 2286292 2041308 2286292 2019.1 0 0 2 3
## 2 53729032 47103356 53729032 2019.1 0 0 1 25
## 3 23789098 20895104 23789098 2019.1 0 0 1 20
## 4 22241500 28980104 22241500 2019.1 0 0 1 1
## 5 2599756 2955964 2599756 2019.1 0 0 1 34
## 6 17223160 17129340 17223160 2019.1 0 0 1 53
## p_h p_nh
## 1 56.76189 76.43527
## 2 24.11634 28.87053
## 3 18.66152 38.83329
## 4 36.93901 66.17111
## 5 16.37354 54.14193
## 6 27.95344 44.00770
A summary of in-home and out-of-home social engagement time across three years (2019-2021).
library(ggalluvial)
df<-ATUS%>%filter(age>59)%>%select("engage_h","engage_nh", "year", "race4", "weight", "month", "caseid")
library(survey)
sydata<-svydesign(id=~caseid, weights=~weight, data=df)
datal<-svyby(~engage_h + engage_nh, ~year, sydata, svymean, keep.var=TRUE)
names(datal)<-c("year", "engage.1", "engage.2", "se.1","se.2")
al<-reshape(datal,
direction = "long",
idvar = "year", # i
timevar = "group", # j
varying = c("engage.1", "engage.2", "se.1","se.2"))
al$group<-factor(al$group, levels=c(1,2), labels=c("At home", "Outside home"))
ggplot(al, aes(axis1 = year, axis2 = group, y = engage)) +
geom_alluvium(aes(fill =group)) +
geom_stratum( alpha = .5) +
scale_x_discrete(expand = c(.1, .1)) +
scale_fill_viridis_d() +
theme_minimal() +
geom_text(stat = "stratum",
aes(label = after_stat(stratum))) +
scale_x_discrete(limits = c("year", "group"),
expand = c(0.15, 0.05)) +
theme_void()+
geom_text(aes(label = paste0(round(engage, 0), "min")), stat = "flow", nudge_x = -.25, alpha=0.7, size=3.5) +
labs(title = "",
# subtitle = "",
x = NULL,
fill = NULL,
y = "") # the \n adds a line break
Here we present a line plot to show how older adults use their time before and over the course of the pandemic. Note, due to the impact of the pandemic, data collection was paused from March 18, 2020, to May 9, 2020. We actually have a missing month for April 2020. Time spent outside home plummeted since the national lockdown.
df<-ATUS%>%filter(age>59)%>%select("engage_h","engage_nh", "year", "race4", "weight", "month", "caseid", "female", "edu4" )%>%mutate(ym=format(lubridate::ym(paste0(year,month)), "%Y.%m"), eg=engage_h+engage_nh)
df$ym<-as.factor(df$ym)
library(survey)
sydata<-svydesign(id=~caseid, weights=~weight, data=df)
datline<-svyby(~engage_h + engage_nh+eg, ~ym, sydata,svymean, keep.var=TRUE)
names(datline)<-c("ym", "engage.1", "engage.2","engage.3", "se.1","se.2", "se.3")
line<-reshape(datline,
direction = "long",
idvar = "ym", # i
timevar = "group", # j
varying = c("engage.1", "engage.2", "engage.3","se.1","se.2","se.3"))
line$group<-factor(line$group, levels=c(1,2,3), labels=c("In home", "Out-of-home", "Total"))
line$upper<-line$engage+1.96*line$se
line$lower<-line$engage-1.96*line$se
line$time<-seq(1,nrow(line), 1)
lplot<-ggplot(data=line, aes(x = ym, y = engage, color= group, group= group)) +
#geom_rect(aes(xmin='2019.01',
# xmax = '2019.12',
# ymin = -Inf,
# ymax = Inf), fill = "grey85", alpha = 0.03) +
#geom_rect(aes(xmin='2021.01',
# xmax = '2021.12',
# ymin = -Inf,
# ymax = Inf), fill = "grey85", alpha = 0.03) +
geom_line( size=0.5) +
geom_point( size = 1.5)+
#geom_ribbon(aes(ymin=lower, ymax=upper), linetype=2, alpha=0.01)+
geom_vline(xintercept = "2020.05",
colour = "grey40",
linetype = 2)+ # vertical line
# geom_vline(xintercept = "2021.08",
# colour = "grey40",
#linetype = 2)+ # vertical line
labs( x= "Month" , y= 'Time (min)',title= "", color="", type="", size=8)+
hrbrthemes::theme_ipsum()+
theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust=1, size=5), legend.title = element_blank(),legend.text = element_text(size=8))+
scale_color_manual(values = c("#D16103", "#293352","#9999CC")) +
coord_cartesian(ylim = c(0,100))
Here gganimate
gives us a nice animation plot.
library(gganimate)
lplot+geom_point() +
transition_reveal(time)
df<-ATUS%>%filter(age>59 )%>%select("engage_h","engage_nh", "year", "race4", "weight", "month", "caseid", "race4")
df<-df%>%mutate(ym=format(lubridate::ym(paste0(df$year,df$month)), "%Y.%m"))
df$ym<-as.factor(df$ym)
library(survey)
sydata<-svydesign(id=~caseid, weights=~weight, data=df)
datline<-svyby(~engage_h + engage_nh, ~ym+race4, sydata,svymean, keep.var=TRUE)
names(datline)<-c("ym", "race4", "engage.1", "engage.2", "se.1","se.2")
line<-reshape2::melt(datline, id.vars =c("ym", "race4"), measure.vars= c("engage.1", "engage.2"))
line$group<-factor(line$variable, levels=c("engage.1","engage.2"), labels=c("At home", "Outside home"))
line$race4<-factor(line$race4, levels=c(1:4), labels=c("White", "Black", "Hispanic", "Other"))
`
ggplot(data=line, aes(x = ym, y = value, color= factor(race4),group= factor(race4))) +
geom_line(size=0.7) +
#geom_ribbon(aes(ymin=lower, ymax=upper), linetype=2, alpha=0.01)+
labs( x= "Month" , y= 'Time (min)',title= "", color="", type="")+
theme_article()+
theme(axis.text.x = element_text(angle = 90, vjust=0.5, hjust=1), legend.title = element_blank())+facet_wrap(~group)
The pattern for out of home engagement is consistent across racial groups. We see a decline in 2020 and a recovery in 2021. The magnitude differs for different racial groups. In home activity time seems increases for Blacks and Other racial groups (almost Asian), But not for Whites or Hispanics.
datb<-svyby(~engage_h + engage_nh, ~year+race4, sydata,svymean, keep.var=TRUE)
ggplot(datb, aes(x = factor(race4), y =engage_nh, fill=factor(year))) +
geom_bar(stat="identity", color="black", width = 0.7,
position=position_dodge(0.7)) +
# scale_fill_manual(values=c("grey30", "grey42", "grey76"))+
geom_errorbar(aes(ymin=engage_nh-1.96*se.engage_nh, ymax=engage_nh+1.96*se.engage_nh), width=.2,
position=position_dodge(0.7))+ theme_article()+
#geom_text(aes(label = round(engage_nh, 1)), size = 3, position = position_dodge(0.7), vjust = -0.5)+
theme(plot.caption = element_text(hjust = 0, vjust=7))+
#scale_x_discrete(labels= c("Male", "Female" ))+
#scale_x_discrete(labels= c(">HS", "HS", "Some college", "college+"))+
scale_x_discrete(labels= c("White", "Black", "Hispanic", "Other"))+
labs( x= " " , y= "Time",title= "Social engagement (outside home) by race", fill="Year", caption = "Error bar: 95% CI")
ggplot(datb, aes(x = factor(race4), y =engage_h, fill=factor(year))) +
geom_bar(stat="identity", color="black", width = 0.7,
position=position_dodge(0.7)) +
#scale_fill_manual(values=c("grey30", "grey42", "grey76"))+
geom_errorbar(aes(ymin=engage_h-1.96*se.engage_h, ymax=engage_h+1.96*se.engage_h), width=.2,
position=position_dodge(0.7))+ theme_article()+
theme(plot.caption = element_text(hjust = 0, vjust=7))+
#scale_x_discrete(labels= c("Male", "Female" ))+
#scale_x_discrete(labels= c(">HS", "HS", "Some college", "college+"))+
scale_x_discrete(labels= c("White", "Black", "Hispanic", "Other"))+
labs( x= " " , y= "Time",title= "Social engagement (at home) by race", fill="Year", caption = "Error bar: 95% CI")
ATUS<-read.dta("/Users/jiaoyu/Documents/Ph.D/projects/ATUS/data/atus_R.dta",convert.factors = T)
df<-ATUS%>%filter(age>59)%>%select("p_h","p_nh", "year", "race4")%>%mutate(year=as.factor(year))
databox<-reshape2::melt(df, id.vars = c("year", "race4"))%>%mutate(group=factor(variable, levels=c("p_h","p_nh"), labels=c("At home", "Outside home")))
ggplot(databox, aes(x = race4, y =value, color= year))+
stat_boxplot(geom = "errorbar", width = 0.5)+
geom_boxplot(width=0.5)+facet_wrap(~group)+
labs( x= "" , y= "Predicted time of social engagement(min)",title= NULL, fill="")+theme_article()+ theme(legend.position = "bottom")+
scale_color_manual(values = c("#D16103", "#293352","#4E84C4")) +
theme(plot.margin = unit(c(1, 2, 1, 1), "lines"),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
library(readxl) ## read multiple datas in one excel
read_excel_allsheets <- function(filename, tibble = FALSE) {
# I prefer straight data.frames
# but if you like tidyverse tibbles (the default with read_excel)
# then just pass tibble = TRUE
sheets <- readxl::excel_sheets(filename)
x <- lapply(sheets, function(X) readxl::read_excel(filename, sheet = X))
if(!tibble) x <- lapply(x, as.data.frame)
names(x) <- sheets
x
}
mysheets <- read_excel_allsheets("/Users/jiaoyu/Documents/Ph.D/projects/ATUS/data/activity_change.xlsx")
library(scales)
datawater<-mysheets$Sheet1
datachange<-mysheets$Sheet2
datachange<-datachange%>%mutate(per=percent(as.numeric(datachange$percent)), per=as.numeric(datachange$percent))
Waterfall plots can easily show changes across years. Here, we show how time spent on social and leisure activities changeS during the pandemic at in-home and out-of-home settings.
library(waterfalls)
water<-datawater%>%filter(activity=="Social and leisure activities" & place== "home")%>%select(year, value)%>%round(., 2)
water$year<-factor(water$year, levels= c(2019, 2020, 2021))
waterfall(water, put_rect_text_outside_when_value_below=0.5, rect_width = 0.4)+
theme_classic()+
labs(title="At Home-social and leisure activities",x = '', y = 'Time (min)')
water<-datawater%>%filter(activity=="Social and leisure activities" & place== "nhome")%>%select(year, value)%>% round(., 2)
water$year<-factor(water$year, levels= c (2019, 2020, 2021))
waterfall(water, put_rect_text_outside_when_value_below=0.5,rect_width = 0.4, fill_colours=c("#D16103","#4E84C4"))+
theme_classic()+
labs(title="Outside Home-social and leisure activities",x = '', y = '')
Another way to show changes across time using lollipop plots. Most in-home activity time changes positively, i.e. increases in time spent at home. Most out-of-home activity time chanegs negatively, i.e. decreases in time spent outside home.
library(ggstance)
datachange$year<-factor(datachange$year, levels=c(2020, 2021), labels=c(2020, 2021))
datachange$activity<-factor(datachange$activity, levels=c("Civic obligations and education", "Telephone call", "Religious and spiritual activities","Volunteer activities","Social and leisure activities"), labels=c("Civic obligations and education", "Telephone call", "Religious and spiritual activities","Volunteer activities","Social and leisure activities"))
ggplot(datachange, aes(y=activity, x=percent*100, colour=factor(year))) +
ggstance::geom_pointrangeh(aes(xmin=0, xmax=percent*100), position=position_dodgev(height=-0.5),
linetype=ifelse(datachange$type=="p", "dashed", "solid")
)+
geom_vline(xintercept = 0,
colour = "grey60",
linetype = 2)+ # vertical line
facet_wrap(~place,ncol=1)+
theme_article()+
labs(color = "Year", x= "Changes of time (%)", y ="",caption = "Note:The percentage of changes in social engagement time at 2010 and 2021, compared to 2019.\n ")+
scale_color_manual(values=c("#D16103","#4E84C4"))+
scale_x_continuous(breaks=c(-80,-60, -40, -20, 0, 20, 40, 60, 100, 150),
labels=c( "-80", "-60%", "-40%","-20%" , "0", "20%", "40%", "60%","100%", "300%"))+theme(axis.text.x = element_text(size=8))
You may also interested in finding state differences in how older adults spent their time before and during the pandemic. We can see a reduction of time spent on outside home activities in 2020, the pandemic year compared to 2019, the pre-pandemic year.
#map by states
library(mapdata)
library(viridis)
library(ggthemes)
library(RColorBrewer)
ATUS<-read.dta("/Users/jiaoyu/Documents/Ph.D/projects/ATUS/data/atus_R.dta",convert.factors = T)
states<-map_data("state")
df<-ATUS%>%filter(age>59)%>%select("engage_h","engage_nh","statefip", "weight", "caseid", "year")
df<-ATUS%>%select("engage_h","engage_nh","statefip", "weight", "caseid", "year")
colnames(df)[3]<-"region"
sydata<-svydesign(id=~caseid, weights=~weight, data=df)
map1<-svyby(~engage_h + engage_nh, ~region+year, sydata,svymean, keep.var=TRUE)
map19<-map1%>%filter(year==2019)
map20<-map1%>%filter(year==2020)
MergedStates19 <-states%>%left_join(map19, by = "region")
MergedStates20 <-states%>%left_join(map20, by = "region")
ggplot()+
geom_polygon( data=MergedStates19,
aes(x=long, y=lat, group=group, fill = engage_nh))+
scale_color_viridis()+
theme_map() +
#scale_fill_viridis(trans = "reverse")
scale_fill_distiller(palette = "RdYlBu", name = "At home")+labs(title= "Social engagement outside home by states-2019")
ggplot()+
geom_polygon( data=MergedStates20,
aes(x=long, y=lat, group=group, fill = engage_nh))+
scale_color_viridis()+
theme_map() +
#scale_fill_viridis(trans = "reverse")
scale_fill_distiller(palette = "RdYlBu", name = "Outside home")+labs(title= "Social engagement outside home by states-2020")