Skip to content

KevinZ的小窝

Menu
  • Home
  • Categories
Menu

manta+strelka2/varscan2 call somatic mutation

Posted on 2025年 2月 20日2025年 7月 10日 by KevinZhou

manta+strelka2 call somatic mutation

bgzip -f AgilentV6_GRCh38_ex_region.sort.filtered.bed 
tabix -p bed AgilentV6_GRCh38_ex_region.sort.filtered.bed.gz

mkdir {manta,strelka2}

conda activate strelka2

cd manta
perl -ne 'chomp; next if /^$/; @a = split /\t/; print "configManta.py --tumorBam ../align/$a[0]_bqsr.bam --normalBam ../align/$a[1]_bqsr.bam --referenceFasta /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa --callRegions /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed.gz --generateEvidenceBam --runDir /data02/zhangmengmeng/BPT/LCM-WES/manta/$a[0] --exome --outputContig \n"' sample_pair.txt > CreateRunManta.sh
./CreateRunManta.sh |grep "runWorkflow.py" > RunManta.sh
nohup ./RunManta.sh > RunManta.log 2>&1 &
cat RunManta.log |grep "Manta workflow successfully completed"

# 注意strelka2的conda版本似乎有问题,需要从github下载后解压使用
cd ../strelka2
perl -ne 'chomp; next if /^$/; @a = split /\t/; print "/data02/zhangmengmeng/software/strelka-2.9.10.centos6_x86_64/bin/configureStrelkaSomaticWorkflow.py --tumorBam ../align/$a[0]_bqsr.bam --normalBam ../align/$a[1]_bqsr.bam --referenceFasta /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa --callRegions /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed.gz --indelCandidates /data02/zhangmengmeng/BPT/LCM-WES/manta/$a[0]/results/variants/candidateSmallIndels.vcf.gz --runDir /data02/zhangmengmeng/BPT/LCM-WES/strelka2/$a[0] --exome \n"' sample_pair.txt > CreateRunStrelka2.sh
./CreateRunStrelka2.sh |grep "runWorkflow.py" > RunStrelka2.sh
sed -i 's#$# -m local -j 4#' RunStrelka2.sh
cat RunStrelka2.log |grep "Strelka somatic workflow successfully completed"

运行完后处理:

1.将所有vcf文件copy到新的文件夹
# cp_results.sh
mkdir -p snp indel

# Move and rename somatic.snvs.vcf.gz
find . -path "*/results/variants/somatic.snvs.vcf.gz" | while read file; do
    new_name=$(echo "$file" | awk -F'/' '{print $(NF-3) ".somatic.snvs.vcf.gz"}')
    cp "$file" "snp/$new_name"
done

# Move and rename somatic.indels.vcf.gz
find . -path "*/results/variants/somatic.indels.vcf.gz" | while read file; do
    new_name=$(echo "$file" | awk -F'/' '{print $(NF-3) ".somatic.indels.vcf.gz"}')
    cp "$file" "indel/$new_name"
done
2.解压并筛选pass
cd snp
gunzip ./*.gz

cd indel
gunzip ./*.gz

cd ../
# filter_pass.sh:
# Process VCF files in the snp directory
for file in snp/*.vcf; do
    grep '^#' "$file" > "${file%.vcf}_filtered.vcf"  # Extract headers
    awk '$7 == "PASS"' "$file" >> "${file%.vcf}_filtered.vcf"  # Append PASS rows
done

# Process VCF files in the indel directory
for file in indel/*.vcf; do
    grep '^#' "$file" > "${file%.vcf}_filtered.vcf"  # Extract headers
    awk '$7 == "PASS"' "$file" >> "${file%.vcf}_filtered.vcf"  # Append PASS rows
done

varscan2 call somatic mutation

cd align
ls *_bqsr.bam|perl -ne 'chomp;if($_=~/(\S+)\_bqsr/){print "samtools view -b -u -q 20 $1_bqsr.bam | samtools mpileup -f /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa - \> $1.mpileup && echo $1 ok\n"}' > samtools_mpileup.sh

cd varscan2
# 同时运行varscan somatic + varscan processSomatic:
perl -ne 'chomp; next if /^$/; @a = split /\t/; print "varscan somatic ../../align/$a[1].mpileup ../../align/$a[0].mpileup $a[0] --strand-filter 1 --min-coverage 100 --min-reads 3 --min-var-freq 0.05 --p-value 0.05 --output-vcf 1 && varscan processSomatic $a[0].snp.vcf --min-tumor-freq 0.05 --max-normal-freq 0.05 --p-value 0.05 && varscan processSomatic $a[0].indel.vcf --min-tumor-freq 0.05 --max-normal-freq 0.05 --p-value 0.05 && echo $a[0] varscan2 ok\n"' ../../somatic/sample_pair.txt > varscan2.sh

# 单独运行varscan2后processSomatic:
perl -ne 'chomp; next if /^$/; @a = split /\t/; print "varscan somatic ../../align/$a[1].mpileup ../../align/$a[0].mpileup $a[0] --strand-filter 1 --min-coverage 250 --min-reads 3 --min-var-freq 0.05 --p-value 0.05 --output-vcf 1 && echo $a[0] varscan2 ok\n"' ../../somatic/sample_pair.txt > varscan2.sh
perl -ne 'chomp; next if /^$/; @a = split /\t/; print "varscan processSomatic $a[0].snp.vcf --min-tumor-freq 0.05 --max-normal-freq 0.05 --p-value 0.05 && varscan processSomatic $a[0].indel.vcf --min-tumor-freq 0.05 --max-normal-freq 0.05 --p-value 0.05 && echo $a[0] varscan2 processSomatic ok\n"' ../../somatic/sample_pair.txt > varscan2_processSomatic.sh

# varscan结束后运行vcf2maf:
conda activate vcf2maf
perl -ne 'chomp;next if($_=~/^$/);my @a=split /\t/;print "vcf2maf.pl --input-vcf $a[0].snp.Somatic.hc.vcf --output-maf $a[0].snp.Somatic.hc.maf --normal-id $a[1] --tumor-id $a[0] --vcf-tumor-id TUMOR --vcf-normal-id NORMAL --vep-path /home/zhoukaiwen/software/anaconda3/envs/vcf2maf/bin --vep-data /home/zhoukaiwen/database/vep/ --ref-fasta /home/zhoukaiwen/database/GRCh38/genecode_GRCh38.p14.genome.fa --verbose --ncbi-build GRCh38 --cache-version 114 && vcf2maf.pl --input-vcf $a[0].snp.Germline.hc.vcf --output-maf $a[0].snp.Germline.hc.maf --normal-id $a[1] --tumor-id $a[0] --vcf-tumor-id TUMOR --vcf-normal-id NORMAL --vep-path /home/zhoukaiwen/software/anaconda3/envs/vcf2maf/bin --vep-data /home/zhoukaiwen/database/vep/ --ref-fasta /home/zhoukaiwen/database/GRCh38/genecode_GRCh38.p14.genome.fa --verbose --ncbi-build GRCh38 --cache-version 114 && vcf2maf.pl --input-vcf $a[0].snp.LOH.hc.vcf --output-maf $a[0].snp.LOH.hc.maf --normal-id $a[1] --tumor-id $a[0] --vcf-tumor-id TUMOR --vcf-normal-id NORMAL --vep-path /home/zhoukaiwen/software/anaconda3/envs/vcf2maf/bin --vep-data /home/zhoukaiwen/database/vep/ --ref-fasta /home/zhoukaiwen/database/GRCh38/genecode_GRCh38.p14.genome.fa --verbose --ncbi-build GRCh38 --cache-version 114 && vcf2maf.pl --input-vcf $a[0].indel.Somatic.hc.vcf --output-maf $a[0].indel.Somatic.hc.maf --normal-id $a[1] --tumor-id $a[0] --vcf-tumor-id TUMOR --vcf-normal-id NORMAL --vep-path /home/zhoukaiwen/software/anaconda3/envs/vcf2maf/bin --vep-data /home/zhoukaiwen/database/vep/ --ref-fasta /home/zhoukaiwen/database/GRCh38/genecode_GRCh38.p14.genome.fa --verbose --ncbi-build GRCh38 --cache-version 114 && vcf2maf.pl --input-vcf $a[0].indel.Germline.hc.vcf --output-maf $a[0].indel.Germline.hc.maf --normal-id $a[1] --tumor-id $a[0] --vcf-tumor-id TUMOR --vcf-normal-id NORMAL --vep-path /home/zhoukaiwen/software/anaconda3/envs/vcf2maf/bin --vep-data /home/zhoukaiwen/database/vep/ --ref-fasta /home/zhoukaiwen/database/GRCh38/genecode_GRCh38.p14.genome.fa --verbose --ncbi-build GRCh38 --cache-version 114 && vcf2maf.pl --input-vcf $a[0].indel.LOH.hc.vcf --output-maf $a[0].indel.LOH.hc.maf --normal-id $a[1] --tumor-id $a[0] --vcf-tumor-id TUMOR --vcf-normal-id NORMAL --vep-path /home/zhoukaiwen/software/anaconda3/envs/vcf2maf/bin --vep-data /home/zhoukaiwen/database/vep/ --ref-fasta /home/zhoukaiwen/database/GRCh38/genecode_GRCh38.p14.genome.fa --verbose --ncbi-build GRCh38 --cache-version 114 && echo $a[0] vcf2maf ok\n"' ../../somatic/sample_pair.txt >vcf2maf.sh

# 批量运行Annovar
ls *.Somatic.hc.vcf | perl -ne 'chomp; my $name = $1 if ($_ =~ /([^\/]+)\.Somatic\.hc\.vcf/); print "table_annovar.pl $name.Somatic.hc.vcf /home/zhoukaiwen/database/Annovar/humandb_hg38 -buildver hg38 -out $name.Somatic.hc -remove -protocol refGene,cytoBand,avsnp151,exac03,gnomad41_exome,EAS.sites.2015_08,cosmic102_unique -operation g,r,f,f,f,f,f -nastring . -vcfinput \n"'>annovar.sh

# 把Annovar注释后vcf转换成maf
perl -ne 'chomp;next if($_=~/^$/);my @a=split /\t/;print "vcf2maf.pl --input-vcf $a[0].snp.Somatic.hc.hg38_multianno.vcf --output-maf $a[0].snp.Somatic.hc.hg38_multianno.maf --normal-id $a[1] --tumor-id $a[0] --vcf-tumor-id TUMOR --vcf-normal-id NORMAL --vep-path /home/zhoukaiwen/software/anaconda3/envs/vcf2maf/bin --vep-data /home/zhoukaiwen/database/vep/ --ref-fasta /home/zhoukaiwen/database/GRCh38/genecode_GRCh38.p14.genome.fa --verbose --ncbi-build GRCh38 --cache-version 114 && cat $a[0].snp.Somatic.hc.hg38_multianno.maf|grep -E \"version|FILTER|PASS\" > $a[0].snp_pass.Somatic.hc.hg38_multianno.maf && vcf2maf.pl --input-vcf $a[0].indel.Somatic.hc.hg38_multianno.vcf --output-maf $a[0].indel.Somatic.hc.hg38_multianno.maf --normal-id $a[1] --tumor-id $a[0] --vcf-tumor-id TUMOR --vcf-normal-id NORMAL --vep-path /home/zhoukaiwen/software/anaconda3/envs/vcf2maf/bin --vep-data /home/zhoukaiwen/database/vep/ --ref-fasta /home/zhoukaiwen/database/GRCh38/genecode_GRCh38.p14.genome.fa --verbose --ncbi-build GRCh38 --cache-version 114 && cat $a[0].indel.Somatic.hc.hg38_multianno.maf|grep -E \"version|FILTER|PASS\" > $a[0].indel_pass.Somatic.hc.hg38_multianno.maf  && echo $a[0] vcf2maf ok\n"' ../../somatic/sample_pair.txt >vcf2maf2.sh
2025 年 2 月
一 二 三 四 五 六 日
 12
3456789
10111213141516
17181920212223
2425262728  
« 1 月   3 月 »

俺家的猫~

胖达~

© 2025 KevinZ的小窝 |

粤ICP备2023017690号

|

粤公网安备 44010402003004号