今天上午不是很忙,贴一个之前用R做的latent growth modeling(潜变量增长模型)。项目背景是给100多个学生做了12次的干预,想看一下有没有什么因素对干预效果有影响。
没有时间加太多注释,大家将就着看一下吧。 根本方法还是用lavaan定义model,这次是把截距 i 和 斜率 s 设定为潜变量,用 不同时间点的测量结果做估计。
教学视频的话可以看这里
之前写过一个用R做结构方程模型 的帖子,是我所有文章里阅读量最高的(已经快3000了!),如果这个文章阅读量很高的话,应该说明大家很需要这方面的教学,我会考虑再写一篇稍微详细一点文章介绍一下步骤。不过对于没有R基础的同学,我还是建议稍微学一下R,这样看到下边的代码应该就已经知道怎么做自己的分析了。
Linear regression and latent growth modeling
Hanchao
June 18, 2018
# 这里有个很重要的一次向加载很多个package的包,可以作为一个常用模板
pacman::p_load(readxl,haven,
dplyr,tidyr, purrr, broom,
ggplot2, corrplot, semPlot,
psych,lavaan, mice)
ggplot(minilit,aes(x=Attend_Percentage,y=WARL_Change,color=Gender))+
geom_point(alpha=0.8,position="jitter")+
geom_smooth(method="lm")
ml_orig <- read_excel("C:/Users/houh1/OneDrive - The University of Melbourne/MiniLit/MiniLit_Path_Data_Clean_20180109 for R analysis.xlsx",sheet = 3)
ml_long <- ml_orig %>%
gather(time,WARL_score,c(11:23))
ml_long$time <- factor(ml_long$time, levels=c("T0","T1", "T2", "T3", "T4",
"T5", "T6", "T7", "T8",
"T9", "T10", "T11", "T12")
)
ml_long <- mutate(ml_long,time_num=as.numeric(time)) #long data to plot
ml_long %>%
ggplot(aes(x=time,y=WARL_score))+
geom_boxplot()
## Warning: Removed 547 rows containing non-finite values (stat_boxplot).
ml_long %>%
filter(!is.na(WARL_score) & WARL == "Y")%>%
ggplot(aes(x=time_num,y=WARL_score))+
geom_point(alpha=0.5)+
geom_smooth(method="lm",se=FALSE)+
facet_wrap(~RCT_ID)
ml_long %>%
filter(!is.na(WARL_score) & WARL == "Y")%>%
ggplot(aes(x=time_num,y=WARL_score,color= factor(RCT_ID)))+
# geom_point(alpha=0.5)+
geom_smooth(method="lm",se=FALSE,size=0.2,show.legend = FALSE)
ml_model_use <- ml_orig %>%
filter(WARL=="Y")%>%
select(10:23,54)
cor(ml_model_use$Attend_Percentage,ml_model_use$TS_Student_Engage, use="complete.obs")
## [1] 0.2046037
summary(ml_model_use)
## Attend_Percentage T0 T1 T2
## Min. :0.08333 Min. : 0.00 Min. : 1.00 Min. : 3.00
## 1st Qu.:0.73148 1st Qu.: 8.00 1st Qu.:13.25 1st Qu.:17.00
## Median :0.79630 Median :12.00 Median :18.50 Median :21.00
## Mean :0.78895 Mean :11.38 Mean :20.85 Mean :23.97
## 3rd Qu.:0.86730 3rd Qu.:15.00 3rd Qu.:26.75 3rd Qu.:30.00
## Max. :0.98058 Max. :17.00 Max. :52.00 Max. :54.00
## NA's :1 NA's :5
## T3 T4 T5 T6
## Min. : 5.00 Min. : 5.00 Min. :10.00 Min. : 6.00
## 1st Qu.:17.00 1st Qu.:21.00 1st Qu.:23.50 1st Qu.:22.00
## Median :25.00 Median :27.00 Median :33.00 Median :31.00
## Mean :24.79 Mean :29.02 Mean :34.96 Mean :32.16
## 3rd Qu.:30.00 3rd Qu.:34.00 3rd Qu.:43.50 3rd Qu.:42.00
## Max. :51.00 Max. :62.00 Max. :75.00 Max. :76.00
## NA's :32 NA's :2 NA's :68 NA's :18
## T7 T8 T9 T10
## Min. : 6.00 Min. : 9.00 Min. :12.00 Min. : 9.00
## 1st Qu.:27.00 1st Qu.:30.00 1st Qu.:31.00 1st Qu.:29.50
## Median :33.00 Median :37.00 Median :37.00 Median :42.00
## Mean :35.82 Mean :38.71 Mean :36.39 Mean :43.14
## 3rd Qu.:42.00 3rd Qu.:46.50 3rd Qu.:42.75 3rd Qu.:54.75
## Max. :99.00 Max. :92.00 Max. :69.00 Max. :90.00
## NA's :1 NA's :29 NA's :21
## T11 T12 TS_Student_Engage
## Min. :15.00 Min. : 8.00 Min. :3.000
## 1st Qu.:29.25 1st Qu.:31.00 1st Qu.:3.167
## Median :43.00 Median :41.00 Median :3.500
## Mean :43.74 Mean :43.83 Mean :3.524
## 3rd Qu.:53.00 3rd Qu.:55.00 3rd Qu.:4.000
## Max. :88.00 Max. :93.00 Max. :4.000
## NA's :37 NA's :19 NA's :30
# Missing value imputation
imp <- mice(ml_model_use)
ml_cpl <- complete(imp)
ml_cpl <- ml_cpl %>%
filter(Attend_Percentage >0.5)
model1 <- ' i =~ 1*T0+1*T1+1*T2+1*T3+1*T4+1*T5+1*T6+1*T7+1*T8+1*T9+1*T10+1*T11+1*T12
s =~ 0*T0+ 1*T1+ 2*T2+ 3*T3+ 4*T4+ 5*T5+ 6*T6+ 7*T7+ 8*T8+ 9*T9+ 10*T10+ 11*T11+ 12*T12
'
model_var <- ' i =~ 1*T0+1*T1+1*T2+1*T3+1*T4+1*T5+1*T6+1*T7+1*T8+1*T9+1*T10+1*T11+1*T12
s =~ 0*T0+ 1*T1+ 2*T2+ 3*T3+ 4*T4+ 5*T5+ 6*T6+ 7*T7+ 8*T8+ 9*T9+ 10*T10+ 11*T11+ 12*T12
i ~ Attend_Percentage + TS_Student_Engage
s ~ Attend_Percentage + TS_Student_Engage
'
fit1 <- growth(model1, data=ml_cpl)
fit_var <- growth(model_var, data=ml_cpl)
summary(fit1,fit.measures=T)
## lavaan (0.6-1) converged normally after 86 iterations
##
## Number of observations 93
##
## Estimator ML
## Model Fit Test Statistic 598.147
## Degrees of freedom 86
## P-value (Chi-square) 0.000
##
## Model test baseline model:
##
## Minimum Function Test Statistic 1932.696
## Degrees of freedom 78
## P-value 0.000
##
## User model versus baseline model:
##
## Comparative Fit Index (CFI) 0.724
## Tucker-Lewis Index (TLI) 0.750
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -3985.910
## Loglikelihood unrestricted model (H1) -3686.837
##
## Number of free parameters 18
## Akaike (AIC) 8007.821
## Bayesian (BIC) 8053.407
## Sample-size adjusted Bayesian (BIC) 7996.585
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.253
## 90 Percent Confidence Interval 0.234 0.272
## P-value RMSEA <= 0.05 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 1.118
##
## Parameter Estimates:
##
## Information Expected
## Information saturated (h1) model Structured
## Standard Errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## i =~
## T0 1.000
## T1 1.000
## T2 1.000
## T3 1.000
## T4 1.000
## T5 1.000
## T6 1.000
## T7 1.000
## T8 1.000
## T9 1.000
## T10 1.000
## T11 1.000
## T12 1.000
## s =~
## T0 0.000
## T1 1.000
## T2 2.000
## T3 3.000
## T4 4.000
## T5 5.000
## T6 6.000
## T7 7.000
## T8 8.000
## T9 9.000
## T10 10.000
## T11 11.000
## T12 12.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## i ~~
## s 1.417 0.841 1.684 0.092
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .T0 0.000
## .T1 0.000
## .T2 0.000
## .T3 0.000
## .T4 0.000
## .T5 0.000
## .T6 0.000
## .T7 0.000
## .T8 0.000
## .T9 0.000
## .T10 0.000
## .T11 0.000
## .T12 0.000
## i 19.140 0.991 19.309 0.000
## s 2.099 0.088 23.964 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .T0 111.412 17.079 6.523 0.000
## .T1 18.699 3.450 5.419 0.000
## .T2 13.536 2.547 5.315 0.000
## .T3 12.306 2.232 5.512 0.000
## .T4 14.522 2.463 5.896 0.000
## .T5 53.567 8.129 6.590 0.000
## .T6 24.247 3.857 6.287 0.000
## .T7 23.761 3.841 6.186 0.000
## .T8 46.793 7.290 6.419 0.000
## .T9 18.008 3.290 5.474 0.000
## .T10 33.597 5.700 5.895 0.000
## .T11 40.386 6.916 5.839 0.000
## .T12 41.140 7.335 5.609 0.000
## i 85.001 13.409 6.339 0.000
## s 0.552 0.105 5.257 0.000
semPaths(fit_var)