github官网:https://github.com/navinlabcode/copykat
- copykat对大样本数据的分析非常慢,需要分每个样本单独分析
1. 分样本准备表达矩阵和细胞亚型信息
library(Seurat)
library(copykat)
library(qs)
library(data.table)
# 读取合并后的seurat文件
seu_obj <- qread("/groups/phyllodes/home/share/Results/transfer/16samples_Merged_AllCells_Annotated_Final.qs")
for (id in unique(seu_obj@meta.data$orig.ident)){
tmp_seuobj <- subset(seu_obj,orig.ident==id)
exp.rawdata <- GetAssayData(tmp_seuobj,slot = "counts")
exp.rawdata <- as.matrix(exp.rawdata)
fwrite(exp.rawdata,
file=paste0("/groups/phyllodes/home/share/Results/scRNA/copykat/",id,".exp.rawdata.txt"),
sep="\t",
quote = FALSE,
row.names = TRUE)
# 准备细胞亚型信息
Idents(tmp_seuobj)=tmp_seuobj$CellType_Minor
#sce <- SetIdent(sce, value = sce@meta.data$compare)
groupinfo=data.frame(v1=colnames(exp.rawdata),
v2= Idents(tmp_seuobj))
fwrite(groupinfo,
file = paste0("/groups/phyllodes/home/share/Results/scRNA/copykat/",id,".groupinfo.txt"),
sep = '\t',
quote = F,
col.names = F,
row.names = F)
}
2. 运行 CopyKAT 的模板文件
RunCopyKAT_EXAMPLE.R
library(Seurat)
library(copykat)
library(qs)
library(data.table)
message("Running EXAMPLE...")
# 在这里设定作为对照的细胞类型
reference_cells <- c("SELL+_CD4+_Tn","FOXP3+_Treg","CD4+_Tem","ANXA1+_CD4+_Tcm",
"CXCL13+_Tfh","CD8+_MAIT","GZMK+_CD8+_Tem","ZNF683+CXCR6-_CD8+_Trm",
"ZNF683+CXCR6+_CD8+_Trm","KLRG1+_CD8+_Temra/Teff","SELL+_CD8+_Tn/Tcm","RSAD2+GZMK+_CD8+_Tem",
"GZMK+PDCD1+_CD8+_Tex","Proliferating_CD8+_T", "CD56dimCD16hi_NKT","CD56brightCD16lo_NK",
"CD56dimCD16hi_NK","IL41+KIT+_NK","Bn","Bm",
"ASCs","Bgc","ASC-like","Unknown_B",
"Macrophages","CD1C+_cDC2","MKI67+_Proliferating_mono/macro","Monocytes","MastCells")
exp.rawdata <- fread("EXAMPLE.exp.rawdata.txt",header = TRUE)
rownames(exp.rawdata) <- exp.rawdata$V1
exp.rawdata$V1 <- NULL
group.info <- fread("EXAMPLE.groupinfo.txt",header = F)
group.info <- subset(group.info,V2 %in% reference_cells)
copykat_obj <- copykat(rawmat=exp.rawdata,
id.type="S",
ngene.chr=5,
win.size=25, KS.cut=0.1,
sam.name="EXAMPLE",
distance="euclidean",
norm.cell.names=group.info$V1,
output.seg="FLASE",
plot.genes="TRUE",
genome="hg20",
n.cores=4)
qsave(copykat_obj,"EXAMPLE_copykat_res.qs")
3. 对每一个样本生成一个运行脚本
#!/bin/bash
# Directory containing your .exp.rawdata.txt files
sample_directory="."
# The path to your original R script
template_script="RunCopyKAT_EXAMPLE.R"
# Loop through the files with .exp.rawdata.txt extension
for sample_file in ${sample_directory}/FETB*.exp.rawdata.txt; do
# Extract the sample number from the filename (e.g., FETB01-MPT-01)
sample_number=$(basename $sample_file .exp.rawdata.txt)
# Copy the template script to a new file based on the sample number
cp $template_script "RunCopyKAT_${sample_number}.R"
# Replace the placeholder EXAMPLE in the new R script with the sample number
sed -i "s/EXAMPLE/${sample_number}/g" "RunCopyKAT_${sample_number}.R"
done
4. 批量运行 CopyKAT
for r_script in RunCopyKAT_FETB*.R; do echo "Rscript $r_script" >> RunCopyKAT_Individual.sh; done && chmod +x RunCopyKAT_Individual.sh
nohup ./RunCopyKAT_Individual.sh > RunCopyKAT_Individual.log 2>&1 &