Skip to content

KevinZ的小窝

Menu
  • Home
  • Categories
Menu

pyclone(老版)亚克隆鉴定

Posted on 2025年 3月 5日2025年 3月 5日 by KevinZhou

1.使用mutect按病人同时处理样本

gatk Mutect2 -R /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa -I ../align/FETB01-BNPT-E_bqsr.bam -I ../align/FETB01-BNPT-M_bqsr.bam -I ../align/FETB01-FA-E_bqsr.bam -I ../align/FETB01-FA-M_bqsr.bam -I ../align/FETB01-B_bqsr.bam -normal FETB01-B -L /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed --germline-resource /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_af-only-gnomad.hg38.vcf.gz -pon /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_1000g_pon.hg38.vcf.gz --f1r2-tar-gz FETB01.f1r2.tar.gz -O FETB01.raw.vcf && gatk LearnReadOrientationModel -I FETB01.f1r2.tar.gz -O FETB01.read-orientation-model.tar.gz && gatk GetPileupSummaries -I ../align/FETB01-BNPT-E_bqsr.bam -I ../align/FETB01-BNPT-M_bqsr.bam -I ../align/FETB01-FA-E_bqsr.bam -I ../align/FETB01-FA-M_bqsr.bam -L /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed -V /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_af-only-gnomad.hg38.vcf.gz -O FETB01.pileups.table && gatk GetPileupSummaries -I ../align/FETB01-B_bqsr.bam -L /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed -V /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_af-only-gnomad.hg38.vcf.gz -O FETB01-B.pileups.table && gatk CalculateContamination -I FETB01.pileups.table -matched FETB01-B.pileups.table -O FETB01.CalculateContamination.table && gatk FilterMutectCalls -R /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa -V FETB01.raw.vcf --contamination-table FETB01.CalculateContamination.table --ob-priors FETB01.read-orientation-model.tar.gz -O FETB01.somatic.vcf && gatk FilterAlignmentArtifacts -R /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa --bwa-mem-index-image /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa.img -V FETB01.somatic.vcf -O FETB01.somatic.FAA.vcf -I ../align/FETB01-BNPT-E_bqsr.bam -I ../align/FETB01-BNPT-M_bqsr.bam -I ../align/FETB01-FA-E_bqsr.bam -I ../align/FETB01-FA-M_bqsr.bam && awk '/^#/ {print $0; next} $7=="PASS"  {print $0}' FETB01.somatic.FAA.vcf > ./FETB01.somatic.passed.vcf && echo FETB01 somatic ok
gatk Mutect2 -R /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa -I ../align/FETB02-BNPT-E_bqsr.bam -I ../align/FETB02-BNPT-M_bqsr.bam -I ../align/FETB02-FA-E_bqsr.bam -I ../align/FETB02-FA-M_bqsr.bam -I ../align/FETB02-B_bqsr.bam -normal FETB02-B -L /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed --germline-resource /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_af-only-gnomad.hg38.vcf.gz -pon /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_1000g_pon.hg38.vcf.gz --f1r2-tar-gz FETB02.f1r2.tar.gz -O FETB02.raw.vcf && gatk LearnReadOrientationModel -I FETB02.f1r2.tar.gz -O FETB02.read-orientation-model.tar.gz && gatk GetPileupSummaries -I ../align/FETB02-BNPT-E_bqsr.bam -I ../align/FETB02-BNPT-M_bqsr.bam -I ../align/FETB02-FA-E_bqsr.bam -I ../align/FETB02-FA-M_bqsr.bam -L /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed -V /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_af-only-gnomad.hg38.vcf.gz -O FETB02.pileups.table && gatk GetPileupSummaries -I ../align/FETB02-B_bqsr.bam -L /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed -V /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_af-only-gnomad.hg38.vcf.gz -O FETB02-B.pileups.table && gatk CalculateContamination -I FETB02.pileups.table -matched FETB02-B.pileups.table -O FETB02.CalculateContamination.table && gatk FilterMutectCalls -R /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa -V FETB02.raw.vcf --contamination-table FETB02.CalculateContamination.table --ob-priors FETB02.read-orientation-model.tar.gz -O FETB02.somatic.vcf && gatk FilterAlignmentArtifacts -R /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa --bwa-mem-index-image /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa.img -V FETB02.somatic.vcf -O FETB02.somatic.FAA.vcf -I ../align/FETB02-BNPT-E_bqsr.bam -I ../align/FETB02-BNPT-M_bqsr.bam -I ../align/FETB02-FA-E_bqsr.bam -I ../align/FETB02-FA-M_bqsr.bam && awk '/^#/ {print $0; next} $7=="PASS"  {print $0}' FETB02.somatic.FAA.vcf > ./FETB02.somatic.passed.vcf && echo FETB02 somatic ok
gatk Mutect2 -R /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa -I ../align/FETB03-BNPT-E_bqsr.bam -I ../align/FETB03-BNPT-M_bqsr.bam -I ../align/FETB03-FA-E_bqsr.bam -I ../align/FETB03-FA-M_bqsr.bam -I ../align/FETB03-B_bqsr.bam -normal FETB03-B -L /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed --germline-resource /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_af-only-gnomad.hg38.vcf.gz -pon /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_1000g_pon.hg38.vcf.gz --f1r2-tar-gz FETB03.f1r2.tar.gz -O FETB03.raw.vcf && gatk LearnReadOrientationModel -I FETB03.f1r2.tar.gz -O FETB03.read-orientation-model.tar.gz && gatk GetPileupSummaries -I ../align/FETB03-BNPT-E_bqsr.bam -I ../align/FETB03-BNPT-M_bqsr.bam -I ../align/FETB03-FA-E_bqsr.bam -I ../align/FETB03-FA-M_bqsr.bam -L /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed -V /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_af-only-gnomad.hg38.vcf.gz -O FETB03.pileups.table && gatk GetPileupSummaries -I ../align/FETB03-B_bqsr.bam -L /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed -V /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/somatic-hg38_af-only-gnomad.hg38.vcf.gz -O FETB03-B.pileups.table && gatk CalculateContamination -I FETB03.pileups.table -matched FETB03-B.pileups.table -O FETB03.CalculateContamination.table && gatk FilterMutectCalls -R /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa -V FETB03.raw.vcf --contamination-table FETB03.CalculateContamination.table --ob-priors FETB03.read-orientation-model.tar.gz -O FETB03.somatic.vcf && gatk FilterAlignmentArtifacts -R /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa --bwa-mem-index-image /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa.img -V FETB03.somatic.vcf -O FETB03.somatic.FAA.vcf -I ../align/FETB03-BNPT-E_bqsr.bam -I ../align/FETB03-BNPT-M_bqsr.bam -I ../align/FETB03-FA-E_bqsr.bam -I ../align/FETB03-FA-M_bqsr.bam && awk '/^#/ {print $0; next} $7=="PASS"  {print $0}' FETB03.somatic.FAA.vcf > ./FETB03.somatic.passed.vcf && echo FETB03 somatic ok

2.使用 Annovar 对每个病人的多样本vcf进行注释

ls *somatic.passed.vcf | perl -ne 'chomp; my $name = $1 if ($_ =~ /([^\/]+)\.somatic\.passed\.vcf/); print "table_annovar.pl $name.somatic.passed.vcf /data02/zhangmengmeng/database/annovar_db/humandb_hg38 -buildver hg38 -out $name -remove -protocol refGene,cytoBand,avsnp151,cosmic70,exac03 -operation g,r,f,f,f -nastring . -vcfinput \n"'>annovar.sh

3.对注释后的vcf进行提取,获取基因名,突变点位,样本名,t_ref_count及t_alt_count

(echo -e "CHROM\tPOS\tREF\tALT\tGENE\t$(bcftools query -l FETB01.hg38_multianno.vcf | tr '\n' '\t')" && bcftools query -f '%CHROM\t%POS\t%REF\t%ALT\t%INFO/Gene.refGene[\t%AD]\n' FETB01.hg38_multianno.vcf) > ../pyclonevi/FETB01_mut.tsv
(echo -e "CHROM\tPOS\tREF\tALT\tGENE\t$(bcftools query -l FETB02.hg38_multianno.vcf | tr '\n' '\t')" && bcftools query -f '%CHROM\t%POS\t%REF\t%ALT\t%INFO/Gene.refGene[\t%AD]\n' FETB02.hg38_multianno.vcf) > ../pyclonevi/FETB02_mut.tsv
(echo -e "CHROM\tPOS\tREF\tALT\tGENE\t$(bcftools query -l FETB03.hg38_multianno.vcf | tr '\n' '\t')" && bcftools query -f '%CHROM\t%POS\t%REF\t%ALT\t%INFO/Gene.refGene[\t%AD]\n' FETB03.hg38_multianno.vcf) > ../pyclonevi/FETB03_mut.tsv

4.运行脚本转换为pyclone的输入格式

ls *_mut.tsv | perl -ne 'chomp;if($_=~/(\S+)\_mut\.tsv/){print "PreparePycloneVIInput -sampleid $1 -mut $1_mut.tsv -facets_cncf ../facets/$1-BNPT-E_cncf.xls,../facets/$1-BNPT-M_cncf.xls,../facets/$1-FA-E_cncf.xls,../facets/$1-FA-M_cncf.xls && echo $1 ok\n"}' > RunPycloneVIPrepare.sh

5.对准备好的输入文件按样本进行分割

./split_pyclone_input.py -tsv FETB01_pyclonevi_input.tsv,FETB02_pyclonevi_input.tsv,FETB03_pyclonevi_input.tsv

6.运行pyclone

ls *_pyclonevi_input.tsv | perl -ne 'chomp; if ($_ =~ /(\S+)_pyclonevi_input\.tsv/) { print "PyClone run_analysis_pipeline --in_files $1-BNPT-E_pyclone_input.tsv $1-FA-E_pyclone_input.tsv --working_dir $1-E && "; print "PyClone run_analysis_pipeline --in_files $1-BNPT-M_pyclone_input.tsv $1-FA-M_pyclone_input.tsv --working_dir $1-M && echo $1 ok\n"; }' > RunPyclone.sh

split_pyclone_input.py

#!/usr/bin/env python
import pandas as pd
import argparse

def split_tsv(input_file):
    # Read the TSV file
    df = pd.read_csv(input_file, sep='\t')

    # Filter: Keep only rows where 'major_cn' > 0
    df = df[df['major_cn'] > 0]

    # Rename 'alt_counts' column to 'var_counts'
    df.rename(columns={'alt_counts': 'var_counts'}, inplace=True)

    # Group by 'sample_id' and create separate files
    for sample, group in df.groupby('sample_id'):
        # Drop 'sample_id' column
        group = group.drop(columns=['sample_id'])

        # Generate the output filename based on the sample_id
        output_filename = f"{sample}_pyclone_input.tsv"

        # Write the group to a TSV file
        group.to_csv(output_filename, sep='\t', index=False)
        print(f"Saved: {output_filename}")

def main():
    parser = argparse.ArgumentParser(description="Split TSV files by sample_id and filter rows where major_cn > 0")
    parser.add_argument('-tsv', type=str, help='Comma-separated list of TSV files', required=True)

    # Parse the command line arguments
    args = parser.parse_args()
    tsv_files = args.tsv.split(',')

    # Process each TSV file
    for file in tsv_files:
        split_tsv(file)

if __name__ == "__main__":
    main()
2025 年 3 月
一 二 三 四 五 六 日
 12
3456789
10111213141516
17181920212223
24252627282930
31  
« 2 月   4 月 »

俺家的猫~

胖达~

© 2026 KevinZ的小窝 |

粤ICP备2023017690号

|

粤公网安备 44010402003004号