1. Call Somatic Mutations
首先通过Mutect2、Varscan2、Strelka2等获取样本突变maf文件,过滤后合并,获取最终maf文件,
# R
# 需要修改过滤后的maf文件的Tumosamplebarcode,把用于构建进化的样本改成一样的Tumosamplebarcode
Merged_maf_df <- read.table("/Users/zhoukaiwen/Desktop/Breast_Phyllodes_Tumor/Bioinfo/WES/Mutation/6patients_mutect2_varscan2_intersect_pass.Somatic.hc.hg38_multianno_filtered.maf",
header = T,sep = '\t',quote = "")
table(Merged_maf_df$Tumor_Sample_Barcode)
# 直接删除-BNPT和-FA,获取单独以-M和-E区分的Tumor_Sample_Barcode
Merged_maf_df$Tumor_Sample_Barcode <- gsub("-BNPT","",Merged_maf_df$Tumor_Sample_Barcode)
Merged_maf_df$Tumor_Sample_Barcode <- gsub("-FA","",Merged_maf_df$Tumor_Sample_Barcode)
table(Merged_maf_df$Tumor_Sample_Barcode)
write.table(Merged_maf_df,"/Users/zhoukaiwen/Desktop/Breast_Phyllodes_Tumor/Bioinfo/WES/Mutation/6patients_mutect2_varscan2_intersect_pass_Tumor_Sample_Barcode_merged.Somatic.hc.hg38_multianno_filtered.maf",col.names = T,row.names = F,sep = '\t',quote = F)
2. 将修整过的maf文件转换为vcf文件(根据T-N进行拆分)
maf2vcf.pl --input-maf 6patients_mutect2_varscan2_intersect_pass_Tumor_Sample_Barcode_merged.Somatic.hc.hg38_multianno_filtered.maf --output-dir /groups/phyllodes/home/share/Results/WES/maf2vcf --output-vcf 6patients_mutect2_varscan2_intersect_pass_Tumor_Sample_Barcode_merged --ref-fasta /home/zhoukaiwen/database/GRCh38/genecode_GRCh38.p14.genome.fa --per-tn-vcfs
3. 使用gatk ASEReadCounter提取allele specific位点信息(不准)
注意此处的sample_pair.txt要加多第四列为分组信息
perl -ne 'chomp; next if /^$/; @a = split /\t/; print "bcftools sort $a[3]\_vs\_$a[1].vcf -o $a[3]\_vs\_$a[1].sorted.vcf && gatk IndexFeatureFile -I $a[3]\_vs\_$a[1].sorted.vcf && gatk ASEReadCounter -R ~/database/GRCh38/genecode_GRCh38.p14.genome.fa -I ../align/$a[0]_bqsr.bam -V $a[3]\_vs\_$a[1].sorted.vcf -L ~/database/AgilentV6_GRCh38_ex_region.sort.filtered.bed --min-base-quality 10 --min-mapping-quality 20 -O $a[0].shared.sites.vcf && echo $a[0] ASEReadCounter ok\n"' sample_pair.txt > ASEReadCounter.sh
3. 重跑gatk Mutect2,设定force call,获取指定位点的信息
注意--allele和-L参数都要指定为上一步构建的vcf文件,才能指定位点force-call
注意此处的sample_pair.txt要加多第四列为分组信息
perl -ne 'chomp; next if /^$/; @a = split /\t/; print "bcftools sort $a[3]\_vs\_$a[1].vcf -o $a[3]\_vs\_$a[1].sorted.vcf && gatk IndexFeatureFile -I $a[3]\_vs\_$a[1].sorted.vcf && gatk Mutect2 -R ~/database/GRCh38/genecode_GRCh38.p14.genome.fa -I ../align/$a[0]_bqsr.bam -I ../align/$a[1]_bqsr.bam -tumor $a[0] -normal $a[1] --alleles $a[3]\_vs\_$a[1].sorted.vcf -L $a[3]\_vs\_$a[1].sorted.vcf --germline-resource ~/database/gatk_bundle/hg38/somatic-hg38_af-only-gnomad.hg38.vcf.gz -pon ~/database/gatk_bundle/hg38/somatic-hg38_1000g_pon.hg38.vcf.gz --f1r2-tar-gz $a[0].shared.sites.f1r2.tar.gz -O $a[0].shared.sites.raw.vcf && echo $a[0] Mutect2 force call ok\n"' sample_pair.txt > Mutect2_force_call_short.sh
4. vcf2maf 转换为maf
perl -ne 'chomp;next if($_=~/^$/);my @a=split /\t/;print "vcf2maf.pl --input-vcf $a[0].shared.sites.raw.vcf --output-maf $a[0].shared.sites.maf --normal-id $a[1] --tumor-id $a[0] --vcf-tumor-id $a[0] --vcf-normal-id $a[1] --vep-path /home/zhoukaiwen/software/anaconda3/envs/vcf2maf/bin --vep-data /home/zhoukaiwen/database/vep/ --ref-fasta /home/zhoukaiwen/database/GRCh38/genecode_GRCh38.p14.genome.fa --verbose --ncbi-build GRCh38 --cache-version 114 && echo $a[0] vcf2maf ok\n"' sample_pair.txt >vcf2maf.sh
# 合并maf
cat *.shared.sites.maf | egrep "^#|^Hugo_Symbol" | head -2 > 6patients.shared.sites.maf
cat *.shared.sites.maf | egrep -v "^#|^Hugo_Symbol" >> 6patients.shared.sites.maf