SELECT——突变/CNV相关性算法

注意 1.6 版本的SELECT需要修改源代码，在select_APC_threshold.R 的get_thresh.2 <- function(x, A) 函数第一行添加x <- as.data.frame(x)，否则会报错

SELECT 原文链接 2017 Cancer Cell

使用代码

## 1. 读入maf文件 ####
IBC_nonIBC_maf <- read.maf("/home/zhoukaiwen/IBC/WES/somatic/all.maf")

## 2. 构建gam ####
maf2gam <- function(maf_file){ # 设置函数转maf格式为突变矩阵 ####
  maf_file_df <- data.frame(matrix(nrow = length(unique(maf_file@data$Tumor_Sample_Barcode)), ncol = length(unique(maf_file@data$Hugo_Symbol))))
  rownames(maf_file_df) <- unique(maf_file@data$Tumor_Sample_Barcode)
  colnames(maf_file_df) <- unique(maf_file@data$Hugo_Symbol)
  maf_file_df[is.na(maf_file_df)] <- "FALSE"
  for (i in 1:nrow(maf_file@data)) {
    # 检查行名和列名是否存在于 A 中
    if (maf_file@data$Tumor_Sample_Barcode[i] %in% rownames(maf_file_df) && maf_file@data$Hugo_Symbol[i] %in% colnames(maf_file_df)) {
      maf_file_df[as.character(maf_file@data$Tumor_Sample_Barcode[i]), as.character(maf_file@data$Hugo_Symbol[i])] <- "TRUE"
    }
  }
  return(maf_file_df)
}

IBC_nonIBC_maf_df <- maf2gam(IBC_nonIBC_maf)

## 3. 构建样本分类数据框 ####
sample_class <- data.frame(TumorType = rep("IBC",39),SampleType=rep("Tumor",39))
rownames(sample_class) <- rownames(IBC_nonIBC_maf_df)
sample_class$TumorType[grepl("nonIBC",rownames(sample_class))] <- "nonIBC"
sample_class$SampleType[grepl("-S",rownames(sample_class))] <- "Skin"

## 4. 构建基因组变异类型数据框 ####
alteration_class <- data.frame(SampleID = colnames(IBC_nonIBC_maf_df),
                               alteration_type = rep("Mutation",length(colnames(IBC_nonIBC_maf_df))))
rownames(alteration_class) <- alteration_class$SampleID
alteration_class$SampleID <- NULL

# 5. 自己构建AL文件用于分析 ####
## 5.1 输入文件1：突变矩阵，行名为样本名，列名为基因名，TRUE为有突变，FALSE为无突变
IBC_al = new.AL(as.matrix(IBC_nonIBC_maf_df))

## 5.2 输入文件2：样本分组表，行名为样本名，列为分组信息，可以在al$samples下使用多种分组方式
IBC_al$samples$sample_id <- rownames(sample_class)
IBC_al$samples$tumor_type = sample_class[rownames(IBC_al$am),]$TumorType
names(IBC_al$samples$tumor_type)<-rownames(sample_class)
IBC_al$samples$sample_type <- sample_class[rownames(IBC_al$am),]$SampleType
names(IBC_al$samples$sample_type) <- rownames(sample_class)

## 5.3 输入文件3：基因的变异类型表，行名为基因名，列为基因组变异类型（MUTATION/CNA_AMP/CNA_DEL）
IBC_al$alterations$alteration_id <- rownames(alteration_class)
IBC_al$alterations$alteration_type <- alteration_class[colnames(IBC_al$am),]
names(IBC_al$alterations$alteration_type) <- rownames(alteration_class)

View(IBC_al)

## 6. Run SELECT ####
IBC_alpi = select(M=IBC_al$am, 
                  sample.class=IBC_al$samples$tumor_type, 
                  alteration.class=IBC_al$alterations$alteration_type, 
                  folder='select_pancan23_IBC/', 
                  n.cores=4)

一	二	三	四	五	六	日
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30	31