我这里的scRNA-seq前期是用scanpy做的,所以首先用scanpy把pySCENIC需要的信息导出来。
import pandas as pd
import scanpy as sc
import anndata as ad
myeloid_cells_har = ad.read_h5ad('myeloid_annotation.h5ad')
expr_matrix = myeloid_cells_har.layers['log1p'].toarray()
df_expr_matrix = pd.DataFrame(data=expr_matrix, index=myeloid_cells_har.obs_names, columns=myeloid_cells_har.var_names)
df_expr_matrix.to_csv('myeloid_cells_har_expression_matrix.csv')
接下来用pySCENIC进行转录因子分析。
dir=/path/to/pyscenic/
tfs=${dir}/allTFs_hg38.txt
feather=${dir}/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather
tbl=${dir}/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl
nohup pyscenic grn \
--num_workers 15 \
--output adj.sample.tsv \
--method grnboost2 \
myeloid_cells_har_expression_matrix.csv \
$tfs &
nohup pyscenic ctx \
adj.sample.tsv $feather \
--annotations_fname $tbl \
--expression_mtx_fname myeloid_cells_har_expression_matrix.csv \
--mode "dask_multiprocessing" \
--output reg.csv \
--num_workers 15 \
--mask_dropouts &
nohup pyscenic aucell \
myeloid_cells_har_expression_matrix.csv \
reg.csv \
--output out_SCENIC.csv \
--num_workers 10 &