1.安装pyscenic
# 参考:https://cloud.tencent.com/developer/article/2228252
# 需要一些依赖
conda create -n pyscenic python=3.7
conda activate pyscenic
mamba install -y numpy scanpy
mamba install -y -c anaconda cytoolz
pip install pyscenic
2. 数据库配置
https://resources.aertslab.org/cistarget/
下载:
1. databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather
2. motif2tf/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl
3. tf_lists/allTFs_hg38.txt
3. R包安装
## SCENIC需要一些依赖包,先安装好
install.package("BiocManager")
BiocManager::install(c("AUCell", "RcisTarget","GENIE3","zoo", "mixtools", "rbokeh","DT", "NMF", "pheatmap", "R2HTML", "Rtsne","doMC", "doRNG","scRNAseq"))
devtools::install_github("aertslab/SCopeLoomR", build_vignettes = TRUE)
devtools::install_github("aertslab/SCENIC")
BiocManager::install("Seurat")
#check
library(SCENIC)
packageVersion("SCENIC")
4. 分步骤运行pySCENIC
4.1 Step1. R中导出表达矩阵
library(Seurat)
library(data.table)
Seurat_obj <- readRDS("/home/zhoukaiwen/Path/To/EXAMPLE.rds")
tmp_matrix <- as.data.frame(GetAssayData(Seurat_obj,slot="counts"))
fwrite(tmp_matrix,file = "EXAMPLE_input.csv", col.names=T, row.names=T, sep=',', quote=F)
4.2 Step2. Python中将表达矩阵导出为loom格式
import os, sys
os.getcwd()
os.listdir(os.getcwd())
import loompy as lp
import numpy as np
import scanpy as sc
x=sc.read_csv("EXAMPLE_input.csv")
row_attrs = {"Gene": np.array(x.obs_names),}
col_attrs = {"CellID": np.array(x.var_names)}
lp.create("EXAMPLE_input.loom",x.X,row_attrs,col_attrs)
4.3 Step3. 正式运行pySCENIC
# 不同物种的数据库不一样,这里是人类是human
dir=/home/zhoukaiwen/database/cisTarget
tfs=$dir/hs_hgnc_tfs.txt
feather=$dir/hg19-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather
tbl=$dir/motifs-v9-nr.hgnc-m0.001-o0.0.tbl
# 一定要保证上面的数据库文件完整无误哦
input_loom=EXAMPLE_input.loom
ls $tfs $feather $tbl
pyscenic grn \
--num_workers 8 \
--output grn.tsv \
--method grnboost2 \
$input_loom $tfs
echo pyscenic step3.1 grn done
pyscenic ctx \
grn.tsv $feather \
--annotations_fname $tbl \
--expression_mtx_fname $input_loom \
--mode "dask_multiprocessing" \
--output ctx.csv \
--num_workers 8 \
--mask_dropouts
echo pyscenic step3.2 ctx done
pyscenic aucell \
$input_loom \
ctx.csv \
--output EXAMPLE_output.loom \
--num_workers 10
echo pyscenic step3.3 aucell done
4.4 R 中可视化
library(SCENIC)
packageVersion("SCENIC")
library(SCopeLoomR)
library(Seurat)
library(SeuratWrappers)
scenicLoomPath='out_SCENIC.loom'
loom <- open_loom(scenicLoomPath)
# Read information from loom file:
regulons_incidMat <- get_regulons(loom, column.attr.name="Regulons")
regulons <- regulonsToGeneLists(regulons_incidMat)
regulonAUC <- get_regulons_AUC(loom, column.attr.name="RegulonsAUC")
sce=readRDS("PD1_cancer.rds")
sce
library(pheatmap)
n=t(scale(t( getAUC(regulonAUC[,] )))) # 'scale'可以对log-ratio数值进行归一化
n[n>2]=2
n[n< -2]= -2
n[1:4,1:4]
dim(n)
ac=data.frame(group= as.character( Idents(sce)))
n[1:4,1:4]
n=n[,colnames(n) %in% colnames(sce)]
rownames(ac)=colnames(n)
#cg=read.table('choose_tf.txt')[,1]
#cg
#cg_n=n[rownames(n) %in% cg,]
cg_n <- n
#p1<-pheatmap(cg_n,show_colnames =F,show_rownames = T,annotation_col=ac)
table(ac$group)
#p2<-pheatmap(cg_n,show_colnames =F,show_rownames = T,annotation_col=ac)
pheatmap(cg_n,show_colnames =F,show_rownames = T,
annotation_col=ac,
filename = 'heatmap_choose_regulon.png')
dev.off()