pySCENIC的安装及运行 – KevinZ的小窝

1.安装pyscenic

# 参考：https://cloud.tencent.com/developer/article/2228252
# 需要一些依赖
conda create -n pyscenic python=3.7 
conda activate pyscenic 

mamba install -y numpy scanpy
mamba install -y -c anaconda cytoolz

pip install pyscenic

2. 数据库配置

https://resources.aertslab.org/cistarget/
下载：
1. databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather
2. motif2tf/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl
3. tf_lists/allTFs_hg38.txt

3. R包安装

## SCENIC需要一些依赖包，先安装好
install.package("BiocManager")
BiocManager::install(c("AUCell", "RcisTarget","GENIE3","zoo", "mixtools", "rbokeh","DT", "NMF", "pheatmap", "R2HTML", "Rtsne","doMC", "doRNG","scRNAseq"))
devtools::install_github("aertslab/SCopeLoomR", build_vignettes = TRUE)
devtools::install_github("aertslab/SCENIC")
BiocManager::install("Seurat")
#check
library(SCENIC)
packageVersion("SCENIC")

4. 分步骤运行pySCENIC

4.1 Step1. R中导出表达矩阵

library(Seurat)
library(data.table)
Seurat_obj <- readRDS("/home/zhoukaiwen/Path/To/EXAMPLE.rds")
tmp_matrix <- as.data.frame(GetAssayData(Seurat_obj,slot="counts"))
fwrite(tmp_matrix,file = "EXAMPLE_input.csv", col.names=T, row.names=T, sep=',', quote=F)

4.2 Step2. Python中将表达矩阵导出为loom格式

import os, sys
os.getcwd()
os.listdir(os.getcwd()) 

import loompy as lp
import numpy as np
import scanpy as sc
x=sc.read_csv("EXAMPLE_input.csv")
row_attrs = {"Gene": np.array(x.obs_names),}
col_attrs = {"CellID": np.array(x.var_names)}
lp.create("EXAMPLE_input.loom",x.X,row_attrs,col_attrs)

4.3 Step3. 正式运行pySCENIC

# 不同物种的数据库不一样，这里是人类是human 
dir=/home/zhoukaiwen/database/cisTarget
tfs=$dir/hs_hgnc_tfs.txt
feather=$dir/hg19-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather
tbl=$dir/motifs-v9-nr.hgnc-m0.001-o0.0.tbl 
# 一定要保证上面的数据库文件完整无误哦 
input_loom=EXAMPLE_input.loom
ls $tfs  $feather  $tbl  

pyscenic grn \
--num_workers 8 \
--output grn.tsv \
--method grnboost2 \
$input_loom  $tfs 

echo pyscenic step3.1 grn done

pyscenic ctx \
grn.tsv $feather \
--annotations_fname $tbl \
--expression_mtx_fname $input_loom  \
--mode "dask_multiprocessing" \
--output ctx.csv \
--num_workers 8  \
--mask_dropouts

echo pyscenic step3.2 ctx done

pyscenic aucell \
$input_loom \
ctx.csv \
--output EXAMPLE_output.loom \
--num_workers 10 

echo pyscenic step3.3 aucell done

4.4 R 中可视化

library(SCENIC)
packageVersion("SCENIC")  
library(SCopeLoomR)
library(Seurat)
library(SeuratWrappers)
scenicLoomPath='out_SCENIC.loom'
loom <- open_loom(scenicLoomPath)
# Read information from loom file: 
regulons_incidMat <- get_regulons(loom, column.attr.name="Regulons")
regulons <- regulonsToGeneLists(regulons_incidMat)
regulonAUC <- get_regulons_AUC(loom, column.attr.name="RegulonsAUC") 
sce=readRDS("PD1_cancer.rds")
sce

library(pheatmap) 
n=t(scale(t( getAUC(regulonAUC[,] )))) # 'scale'可以对log-ratio数值进行归一化
n[n>2]=2 
n[n< -2]= -2
n[1:4,1:4]
dim(n) 
ac=data.frame(group= as.character( Idents(sce)))
n[1:4,1:4]
n=n[,colnames(n) %in% colnames(sce)]
rownames(ac)=colnames(n) 
#cg=read.table('choose_tf.txt')[,1]
#cg
#cg_n=n[rownames(n) %in% cg,]
cg_n <- n
#p1<-pheatmap(cg_n,show_colnames =F,show_rownames = T,annotation_col=ac)
table(ac$group)

#p2<-pheatmap(cg_n,show_colnames =F,show_rownames = T,annotation_col=ac)
pheatmap(cg_n,show_colnames =F,show_rownames = T,
         annotation_col=ac,
         filename = 'heatmap_choose_regulon.png')
dev.off()

一	二	三	四	五	六	日
						1
2	3	4	5	6	7	8
9	10	11	12	13	14	15
16	17	18	19	20	21	22
23	24	25	26	27	28	29
30