使用CopyKAT进行单细胞恶性细胞鉴定

github官网：https://github.com/navinlabcode/copykat

copykat对大样本数据的分析非常慢，需要分每个样本单独分析

1. 分样本准备表达矩阵和细胞亚型信息

library(Seurat)
library(copykat)
library(qs)
library(data.table)

# 读取合并后的seurat文件
seu_obj <- qread("/groups/phyllodes/home/share/Results/transfer/16samples_Merged_AllCells_Annotated_Final.qs")

for (id in unique(seu_obj@meta.data$orig.ident)){
  tmp_seuobj <- subset(seu_obj,orig.ident==id)
  exp.rawdata <- GetAssayData(tmp_seuobj,slot = "counts")
  exp.rawdata <- as.matrix(exp.rawdata)
  fwrite(exp.rawdata,
         file=paste0("/groups/phyllodes/home/share/Results/scRNA/copykat/",id,".exp.rawdata.txt"),
         sep="\t",
         quote = FALSE,
         row.names = TRUE)

  # 准备细胞亚型信息
  Idents(tmp_seuobj)=tmp_seuobj$CellType_Minor

  #sce <- SetIdent(sce, value = sce@meta.data$compare)
  groupinfo=data.frame(v1=colnames(exp.rawdata),
                       v2= Idents(tmp_seuobj))
  fwrite(groupinfo,
         file = paste0("/groups/phyllodes/home/share/Results/scRNA/copykat/",id,".groupinfo.txt"),
         sep = '\t',
         quote = F,
         col.names = F,
         row.names = F)
}

2. 运行 CopyKAT 的模板文件

RunCopyKAT_EXAMPLE.R

library(Seurat)
library(copykat)
library(qs)
library(data.table)
message("Running EXAMPLE...")

# 在这里设定作为对照的细胞类型
reference_cells <- c("SELL+_CD4+_Tn","FOXP3+_Treg","CD4+_Tem","ANXA1+_CD4+_Tcm",
                     "CXCL13+_Tfh","CD8+_MAIT","GZMK+_CD8+_Tem","ZNF683+CXCR6-_CD8+_Trm",
                     "ZNF683+CXCR6+_CD8+_Trm","KLRG1+_CD8+_Temra/Teff","SELL+_CD8+_Tn/Tcm","RSAD2+GZMK+_CD8+_Tem",
                     "GZMK+PDCD1+_CD8+_Tex","Proliferating_CD8+_T", "CD56dimCD16hi_NKT","CD56brightCD16lo_NK",
                     "CD56dimCD16hi_NK","IL41+KIT+_NK","Bn","Bm",
                     "ASCs","Bgc","ASC-like","Unknown_B",
                     "Macrophages","CD1C+_cDC2","MKI67+_Proliferating_mono/macro","Monocytes","MastCells")

exp.rawdata <- fread("EXAMPLE.exp.rawdata.txt",header = TRUE)
rownames(exp.rawdata) <- exp.rawdata$V1
exp.rawdata$V1 <- NULL
group.info <- fread("EXAMPLE.groupinfo.txt",header = F)
group.info <- subset(group.info,V2 %in% reference_cells)
copykat_obj <- copykat(rawmat=exp.rawdata,
                       id.type="S",
                       ngene.chr=5,
                       win.size=25, KS.cut=0.1,
                       sam.name="EXAMPLE",
                       distance="euclidean",
                       norm.cell.names=group.info$V1,
                       output.seg="FLASE",
                       plot.genes="TRUE",
                       genome="hg20",
                       n.cores=4)

qsave(copykat_obj,"EXAMPLE_copykat_res.qs")

3. 对每一个样本生成一个运行脚本

#!/bin/bash

# Directory containing your .exp.rawdata.txt files
sample_directory="."

# The path to your original R script
template_script="RunCopyKAT_EXAMPLE.R"

# Loop through the files with .exp.rawdata.txt extension
for sample_file in ${sample_directory}/FETB*.exp.rawdata.txt; do
    # Extract the sample number from the filename (e.g., FETB01-MPT-01)
    sample_number=$(basename $sample_file .exp.rawdata.txt)

    # Copy the template script to a new file based on the sample number
    cp $template_script "RunCopyKAT_${sample_number}.R"

    # Replace the placeholder EXAMPLE in the new R script with the sample number
    sed -i "s/EXAMPLE/${sample_number}/g" "RunCopyKAT_${sample_number}.R"
done

4. 批量运行 CopyKAT

for r_script in RunCopyKAT_FETB*.R; do echo "Rscript $r_script" >> RunCopyKAT_Individual.sh; done && chmod +x RunCopyKAT_Individual.sh

nohup ./RunCopyKAT_Individual.sh > RunCopyKAT_Individual.log 2>&1 &

一	二	三	四	五	六	日
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30	31