系列回顾:
ArchR官网教程学习笔记1:Getting Started with ArchR
ArchR官网教程学习笔记2:基于ArchR推测Doublet
ArchR官网教程学习笔记3:创建ArchRProject
ArchR官网教程学习笔记4:ArchR的降维
ArchR官网教程学习笔记5:ArchR的聚类
ArchR官网教程学习笔记6:单细胞嵌入(Single-cell Embeddings)
ArchR官网教程学习笔记7:ArchR的基因评分和Marker基因
ArchR官网教程学习笔记8:定义与scRNA-seq一致的聚类
ArchR官网教程学习笔记9:ArchR的伪批量重复
ArchR官网教程学习笔记10:ArchR的call peak
ArchR官网教程学习笔记11:鉴定Marker峰
ArchR官网教程学习笔记12:Motif和Feature富集
ArchR官网教程学习笔记13:ChromVAR偏差富集
ArchR官网教程学习笔记14:ArchR的Footprinting分析
ArchR官网教程学习笔记15:ArchR的整合分析
ArchR官网教程学习笔记16(上):ArchR的轨迹推断分析
这是ArchR官网手册的最后一部分,后面的更多功能还在开发中。
Lymphoid Trajectory - B 细胞分化
作为轨迹分析的第二个例子,我们将创建一个B细胞轨迹从祖细胞,通过共同的淋巴祖细胞和前B细胞,一直到完全分化的B细胞。因为这种分析本质上是重复上一节的分析过程,所以我们不提供代码的解释。如果你想看详细的代码解释,请看上一节。
> p1 <- plotEmbedding(ArchRProj = projHeme5, colorBy = "cellColData", name = "Clusters", embedding = "UMAP")
> p2 <- plotEmbedding(ArchRProj = projHeme5, colorBy = "cellColData", name = "Clusters2", embedding = "UMAP")
> ggAlignPlots(p1, p2, type = "h")
(1)UMAP拟时间和独立特征绘图
> trajectory <- c("Progenitor", "CLP", "PreB", "B")
> trajectory
[1] "Progenitor" "CLP" "PreB" "B"
> projHeme5 <- addTrajectory(
ArchRProj = projHeme5,
name = "LymphoidU",
groupBy = "Clusters2",
trajectory = trajectory,
embedding = "UMAP",
force = TRUE
)
> head(projHeme5$LymphoidU[!is.na(projHeme5$LymphoidU)])
[1] 80.68850 79.93283 81.94794 83.75315 82.66163 81.61209
> p <- plotTrajectory(projHeme5, trajectory = "LymphoidU", colorBy = "cellColData", name = "LymphoidU")
> p[[1]]
> plotPDF(p, name = "Plot-LymphoidU-Traj-UMAP.pdf", ArchRProj = projHeme5, addDOC = FALSE, width = 5, height = 5)
> p1 <- plotTrajectory(projHeme5, trajectory = "LymphoidU", colorBy = "GeneScoreMatrix", name = "PAX5", continuousSet = "horizonExtra")
> p2 <- plotTrajectory(projHeme5, trajectory = "LymphoidU", colorBy = "GeneIntegrationMatrix", name = "PAX5", continuousSet = "blueYellow")
> ggAlignPlots(p1[[1]], p2[[1]], type = "h")
> ggAlignPlots(p1[[2]], p2[[2]], type = "h")
(二)拟时间热图
> trajMM <- getTrajectory(ArchRProj = projHeme5, name = "LymphoidU", useMatrix = "MotifMatrix", log2Norm = FALSE)
> p1 <- plotTrajectoryHeatmap(trajMM, pal = paletteContinuous(set = "solarExtra"))
> trajGSM <- getTrajectory(ArchRProj = projHeme5, name = "LymphoidU", useMatrix = "GeneScoreMatrix", log2Norm = TRUE)
> p2 <- plotTrajectoryHeatmap(trajGSM, pal = paletteContinuous(set = "horizonExtra"))
> trajGIM <- getTrajectory(ArchRProj = projHeme5, name = "LymphoidU", useMatrix = "GeneIntegrationMatrix", log2Norm = FALSE)
> p3 <- plotTrajectoryHeatmap(trajGIM, pal = paletteContinuous(set = "blueYellow"))
> trajPM <- getTrajectory(ArchRProj = projHeme5, name = "LymphoidU", useMatrix = "PeakMatrix", log2Norm = TRUE)
> p4 <- plotTrajectoryHeatmap(trajPM, pal = paletteContinuous(set = "solarExtra"))
> plotPDF(p1, p2, p3, p4, name = "Plot-LymphoidU-Traj-Heatmaps.pdf", ArchRProj = projHeme5, addDOC = FALSE, width = 6, height = 8)
(三)整合拟时间分析
> corGSM_MM <- correlateTrajectories(trajGSM, trajMM)
> corGSM_MM[[1]]$matchname1
[1] "CREM" "NFE2" "IRF8" "MAFG" "RFX2"
[6] "CEBPA" "SPIB" "FOSL2" "BCL11A" "FOXP1"
[11] "GATA2" "PAX5"
> corGSM_MM[[1]]
DataFrame with 12 rows and 12 columns
idx1 idx2 matchname1 matchname2 name1 name2 Correlation VarAssay1
<integer> <integer> <array> <array> <character> <character> <numeric> <numeric>
1 2503 978 CREM CREM chr10:CREM z:CREM_108 0.627829044418012 0.853720759285683
2 5181 989 NFE2 NFE2 chr12:NFE2 z:NFE2_119 0.774565931839941 0.974488692869806
3 8690 1503 IRF8 IRF8 chr16:IRF8 z:IRF8_633 0.507694340303834 0.869805854628789
4 10078 1018 MAFG MAFG chr17:MAFG-DT z:MAFG_148 0.562466520749052 0.958922471570026
5 10649 1594 RFX2 RFX2 chr19:RFX2 z:RFX2_724 0.69789030765449 0.973494184286764
... ... ... ... ... ... ... ... ...
8 12241 975 FOSL2 FOSL2 chr2:FOSL2 z:FOSL2_105 0.825674336982991 0.992562805378994
9 12398 1064 BCL11A BCL11A chr2:BCL11A z:BCL11A_194 0.606153441405121 0.88640982401522
10 15543 1223 FOXP1 FOXP1 chr3:FOXP1 z:FOXP1_353 0.536888802177499 0.948890906732391
11 15802 1258 GATA2 GATA2 chr3:GATA2 z:GATA2_388 0.590335879793854 0.906559432697713
12 21499 1579 PAX5 PAX5 chr9:PAX5 z:PAX5_709 0.733196281144466 0.982920396073853
VarAssay2 TStat Pval FDR
<numeric> <numeric> <numeric> <numeric>
1 0.885057471264368 7.98507143182915 2.72991368017422e-12 2.02758134245667e-10
2 0.989655172413793 12.1231529692724 3.28687596108806e-21 1.34268883010447e-18
3 0.975287356321839 5.8336624109903 6.99435032374487e-08 2.07795789618166e-06
4 0.936206896551724 6.73439692355051 1.12841066228852e-09 4.60955755544861e-08
5 0.923563218390805 9.64635100744245 7.08941391574889e-16 1.28712248203708e-13
... ... ... ... ...
8 0.997701149425287 14.4887028649413 4.19616643781117e-26 3.42826797969173e-23
9 0.974137931034483 7.54464082413095 2.34445039301945e-11 1.47339690084376e-09
10 0.87183908045977 6.29989802423 8.49671859337208e-09 2.9539655705468e-07
11 0.992528735632184 7.24025437563277 1.0192127271972e-10 5.74273653875939e-09
12 0.966091954022989 10.6736627799731 4.20029954662577e-18 1.14388157653108e-15
> trajGSM2 <- trajGSM[corGSM_MM[[1]]$name1, ]
> trajMM2 <- trajMM[corGSM_MM[[1]]$name2, ]
> trajCombined <- trajGSM2
> assay(trajCombined) <- t(apply(assay(trajGSM2), 1, scale)) + t(apply(assay(trajMM2), 1, scale))
> combinedMat <- plotTrajectoryHeatmap(trajCombined, returnMat = TRUE, varCutOff = 0)
> rowOrder <- match(rownames(combinedMat), rownames(trajGSM2))
> ht1 <- plotTrajectoryHeatmap(trajGSM2, pal = paletteContinuous(set = "horizonExtra"), varCutOff = 0, rowOrder = rowOrder)
> ht2 <- plotTrajectoryHeatmap(trajMM2, pal = paletteContinuous(set = "solarExtra"), varCutOff = 0, rowOrder = rowOrder)
> ht1+ht2
> plotPDF(ht1+ht2, name = "B_cell_differentiation_3.pdf", ArchRProj = projHeme5, addDOC = FALSE, width = 6, height = 4)
> corGIM_MM <- correlateTrajectories(trajGIM, trajMM)
> corGIM_MM[[1]]$matchname1
[1] "NFIA" "GATA3" "NFKB2" "IRF7" "ETS1" "NFE2"
[7] "STAT2" "FOXO1" "FOS" "IRF8" "HLF" "MBD2"
[13] "TCF4" "TCF3" "NFIC" "KLF2" "CEBPA" "POU2F2"
[19] "RELB" "FOSB" "FOSL2" "BCL11A" "REL" "RUNX1"
[25] "ATF4" "FOXP1" "GATA2" "HLTF" "LEF1" "IRF2"
[31] "MEF2C" "IRF1" "EBF1" "TFEB" "PAX5"
> corGIM_MM[[1]]
DataFrame with 35 rows and 12 columns
idx1 idx2 matchname1 matchname2 name1
<integer> <integer> <array> <array> <character>
1 680 1612 NFIA NFIA chr1:NFIA
2 1936 1254 GATA3 GATA3 chr10:GATA3
3 2428 1584 NFKB2 NFKB2 chr10:NFKB2
4 2625 1505 IRF7 IRF7 chr11:IRF7
5 3790 1202 ETS1 ETS1 chr11:ETS1
... ... ... ... ... ...
31 14165 1510 MEF2C MEF2C chr5:MEF2C
32 14287 1499 IRF1 IRF1 chr5:IRF1
33 14558 937 EBF1 EBF1 chr5:EBF1
34 15164 902 TFEB TFEB chr6:TFEB
35 17354 1579 PAX5 PAX5 chr9:PAX5
name2 Correlation VarAssay1
<character> <numeric> <numeric>
1 z:NFIA_742 0.849724184749172 0.87866243750336
2 z:GATA3_384 0.93659867389715 0.808988764044944
3 z:NFKB2_714 0.859622456404564 0.951723025643783
4 z:IRF7_635 0.764746603642242 0.906832965969572
5 z:ETS1_332 0.712968832845789 0.972582119240901
... ... ... ...
31 z:MEF2C_640 0.596546194551142 0.990323100908553
32 z:IRF1_629 0.847449427802714 0.946131928390947
33 z:EBF1_67 0.935345985945295 0.978119455943229
34 z:TFEB_32 0.714302160261238 0.907908176979732
35 z:PAX5_709 0.749530220292627 0.978280737594753
VarAssay2 TStat Pval
<numeric> <numeric> <numeric>
1 0.963793103448276 15.9548594705724 5.37145525952811e-29
2 0.987931034482759 26.4604722221253 1.99903508435256e-46
3 0.894827586206897 16.6555730597593 2.44787247491045e-30
4 0.902873563218391 11.7495562928653 2.04816496580159e-20
5 0.900574712643678 10.0657079170526 8.71545807437529e-17
... ... ... ...
31 0.899425287356322 7.35816402847822 5.77863043081892e-11
32 0.982183908045977 15.8027849003002 1.05864582937014e-28
33 0.993103448275862 26.1763006397724 5.05800469190381e-46
34 0.822988505747126 10.1041046801401 7.19422460446705e-17
35 0.966091954022989 11.2089206437606 2.94623276852313e-19
FDR
<numeric>
1 1.84221021123075e-27
2 2.64443784015781e-44
3 9.85534744246555e-29
4 3.87061379251485e-19
5 1.0760685569162e-15
... ...
31 3.87754476734661e-10
32 3.49480307537261e-27
33 5.2041248274477e-44
34 9.00250268072498e-16
35 5.05224359935633e-18
> trajGIM2 <- trajGIM[corGIM_MM[[1]]$name1, ]
> trajMM2 <- trajMM[corGIM_MM[[1]]$name2, ]
> trajCombined <- trajGIM2
> assay(trajCombined) <- t(apply(assay(trajGIM2), 1, scale)) + t(apply(assay(trajMM2), 1, scale))
> combinedMat <- plotTrajectoryHeatmap(trajCombined, returnMat = TRUE, varCutOff = 0)
> rowOrder <- match(rownames(combinedMat), rownames(trajGIM2))
> ht1 <- plotTrajectoryHeatmap(trajGIM2, pal = paletteContinuous(set = "blueYellow"), varCutOff = 0, rowOrder = rowOrder)
> ht2 <- plotTrajectoryHeatmap(trajMM2, pal = paletteContinuous(set = "solarExtra"), varCutOff = 0, rowOrder = rowOrder)
> ht1+ht2
> plotPDF(ht1+ht2, name = "B_cell_differentiation_4.pdf", ArchRProj = projHeme5, addDOC = FALSE, width = 6, height = 4)