安装 cnv_facets 并准备文件
conda create -n facets
conda activate facets
conda install bioconda::cnv_facets
# 下载snp文件:
# https://ftp.ncbi.nih.gov/snp/organisms/human_9606/VCF/00-common_all.vcf.gz
gunzip common_all.vcf.gz
# 查看染色体编号格式有无chr
awk '!/^#/ {print $1}' 00-common_all.vcf | sort -u
# 对除了有#的行添加chr:
sed '/^#/!s/^/chr/' 00-common_all.vcf > 00-common_all_chr.vcf
bgzip 00-common_all_chr.vcf
tabix 00-common_all_chr.vcf.gz
bgzip 00-common_all.vcf
运行facets
1. 使用方法一:BAM & VCF input (会画图,生成vcf文件,不好用)
perl -ne 'chomp; next if /^$/; @a = split /\t/; print "cnv_facets.R -t ../align/$a[0]_bqsr.bam -n ../align/$a[1]_bqsr.bam -vcf /data02/zhangmengmeng/database/dbSNP/00-common_all_chr.vcf.gz -o $a[0] && echo $a[0] facets ok\n"' sample_pair.txt > Runfacets.sh
2. 使用方法二:Pileup input (可以输出肿瘤倍性文件以及CN表格,好用)
This pileup file is generated by cnv_facets.R when run with bam input as in option 1. If you need to explore different parameter values for CNV detection, using a pre-made pileup file can save considerable computing time.
perl -ne 'chomp;next if($_=~/^$/);my @a=split /\t/;print "snp-pileup -A -p -v /data02/zhangmengmeng/database/dbSNP/00-common_all_chr.vcf.gz $a[0].txt ../align/$a[1]_bqsr.bam ../align/$a[0]_bqsr.bam && cp /data02/zhangmengmeng/software/facets_demo.R $a[0].R && sed -i \"s/example/$a[0]/\" $a[0].R && Rscript $a[0].R && echo $a[0] facets ok\n"' sample_pair.txt >CreateFacetsPileup.sh
ls *_cncf.xls|perl -ne 'chomp;if($_=~/(\S+)\_cncf.xls/){print "perl /data02/zhangmengmeng/software/annovar/facets_annovar.pl $_ >$1.cnv.annovar && perl /data02/zhangmengmeng/software/annovar/annotate_variation.pl -buildver hg38 $1.cnv.annovar /data02/zhangmengmeng/database/annovar_db/humandb_hg38 && perl /data02/zhangmengmeng/software/annovar/annotate_variation.pl -regionanno -build hg38 -out $1.cnv.annovar -dbtype cytoBand $1.cnv.annovar /data02/zhangmengmeng/database/annovar_db/humandb_hg38 & echo $1 facets annovar ok\n"}' > facets.annovar.sh
facets_demo.R
library(facets)
rcmat<-readSnpMatrix("example.txt", perl.pileup=F)
set.seed(1234)
xx=preProcSample(rcmat)
oo=procSample(xx)
fit=emcncf(oo)
info<-c(fit$purity, fit$ploidy)
write.table(info, file="example_info.xls", sep="\t",quote=T,col.names=T,row.names=T)
write.table(fit$cncf, file="example_cncf.xls", sep="\t",quote=T,col.names=T,row.names=T)
facets_annovar.pl
#!/usr/bin/perl
use strict;
use File::Basename qw(basename dirname);
my $file=shift;
open(FILE,$file)||die"can't open the cnv file\n";
while (<FILE>) {
chomp;
$_=~s/\"//g;
my @temp=split /\t/;
next if($_=~/NA/ || $temp[3]<100 || $temp[-2]==2);
if ($temp[-2]>2) {
print "chr$temp[1]\t$temp[-5]\t$temp[-4]\t0\t0\tGain\t$temp[-3]\t$temp[-2]\t$temp[-1]\n";
}elsif($temp[-2]<2){
print "chr$temp[1]\t$temp[-5]\t$temp[-4]\t0\t0\tLoss\t$temp[-3]\t$temp[-2]\t$temp[-1]\n";
}
}
close FILE;