{"id":63,"date":"2023-03-07T14:51:44","date_gmt":"2023-03-07T06:51:44","guid":{"rendered":"https:\/\/www.kz-hub.tech\/?p=63"},"modified":"2024-07-12T20:48:52","modified_gmt":"2024-07-12T12:48:52","slug":"%e7%94%9f%e4%bf%a1%e5%88%86%e6%9e%90%e6%b5%81%e7%a8%8b%e6%b1%87%e6%80%bb","status":"publish","type":"post","link":"https:\/\/www.kz-hub.tech\/index.php\/2023\/03\/07\/%e7%94%9f%e4%bf%a1%e5%88%86%e6%9e%90%e6%b5%81%e7%a8%8b%e6%b1%87%e6%80%bb\/","title":{"rendered":"\u751f\u4fe1\u5206\u6790\u6d41\u7a0b\u6c47\u603b\uff08\u81ea\u7528\u672a\u6574\u7406\uff09"},"content":{"rendered":"<h1>\u5185\u5bb9\u94fe\u63a5<\/h1>\n<p><a href=\"#scrna-seq-\u5355\u7ec6\u80de\u5206\u6790\u6d41\u7a0b\">scRNA-seq \u5355\u7ec6\u80de\u5206\u6790\u6d41\u7a0b<\/a><\/p>\n<p><a href=\"#exome-seq-\u5916\u663e\u5b50\u6d4b\u5e8f\u5206\u6790\u6d41\u7a0b\">Exome-seq \u5916\u663e\u5b50\u6d4b\u5e8f\u5206\u6790\u6d41\u7a0b<\/a><\/p>\n<p><a href=\"#rna-seq-\u8f6c\u5f55\u7ec4\u6d4b\u5e8f\u5206\u6790\u6d41\u7a0b\">RNA-seq \u8f6c\u5f55\u7ec4\u6d4b\u5e8f\u5206\u6790\u6d41\u7a0b<\/a><\/p>\n<h1>\u6784\u5efa SBATCH shell \u811a\u672c\u7684\u5f00\u5934<\/h1>\n<pre><code class=\"language-bash\">#!\/bin\/bash\n#SBATCH -J\n#SBATCH -w node7\n#SBATCH --ntasks-per-node=16<\/code><\/pre>\n<h1>Environment Settings \u73af\u5883\u8bbe\u7f6e<\/h1>\n<pre><code class=\"language-bash\"> export PATH=\/home\/software\/R-4.0.3\/bin:$PATH\n export LD_LIBRARY_PATH=\/home\/software\/R-4.0.3\/lib64:$LD_LIBRARY_PATH<\/code><\/pre>\n<h1>scRNA-seq \u5355\u7ec6\u80de\u5206\u6790\u6d41\u7a0b<\/h1>\n<h2>1. \u6fc0\u6d3bconda\u73af\u5883(\u53ef\u4ee5\u4e0d\u7528 conda \u8dd1)<\/h2>\n<pre><code class=\"language-bash\">conda activate cellranger<\/code><\/pre>\n<h2>2. \u6570\u636e\u89e3\u538b\uff08bam2fastq\uff09<\/h2>\n<h3>perl \u811a\u672c\u521b\u5efash\u6587\u4ef6\u7528\u4e8e\u540e\u7eed\u8fd0\u884c<\/h3>\n<pre><code class=\"language-bash\">perl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\.\/;print &quot;bamtofastq $a[0]\\.bam $a[0] &amp;&amp; echo $a[0] ok\\n&quot;&#039; sample_pair.txt &gt;bamtofastq.sh\nnohup .\/bamtofastq.sh &amp;<\/code><\/pre>\n<h2>3. cellranger count\uff08\u751f\u6210\u6587\u4ef6\u5939\u5305\u62ecouts\u6587\u4ef6\u5939\uff0c\u5185\u542b\u7528\u4e8eR\u7684Seurat\u7684features, barcode, matrix\uff09<\/h2>\n<pre><code class=\"language-bash\">perl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\.\/;print &quot;cellranger count --id=$a[0] --transcriptome=\/home\/zhoukaiwen\/database\/cellranger\/refdata-gex-GRCh38-2020-A --fastqs=\/home\/zhoukaiwen\/IBC\/scRNA\/rawdata\/$a[0] --sample=$a[0] &amp;&amp; echo $a[0] ok\\n&quot;&#039; sample_pair.txt &gt;cellranger_count.sh<\/code><\/pre>\n<h2>4. cellranger vdj\uff08TCR\u3001BCR-seq\uff0c\u751f\u6210\u8868\u683c\uff0c\u7528\u4e8e\u5728Seurat\u4e2d\u5bfc\u5165\u5230metadata\uff09<\/h2>\n<pre><code class=\"language-bash\">perl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\.\/;print &quot;cellranger vdj --id=$a[0] --reference=\/home\/zhoukaiwen\/database\/cellranger\/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0 --fastqs=\/home\/zhoukaiwen\/IBC\/scTCR\/rawdata\/$a[0] --sample=$a[0] --localcores=8 --localmem=64 &amp;&amp; echo $a[0] ok&quot;&#039; sample_pair.txt &gt;cellranger_vdj.sh\n<\/code><\/pre>\n<h2>5\u3001cellranger aggr \u5c06\u6240\u6709\u6837\u672c\u5408\u6210\u4e00\u4efd\u4ee5\u6392\u9664\u591a\u79cd\u514b\u9686<\/h2>\n<h3>5.1 \u521b\u5efa aggr.csv \u6587\u4ef6, \u683c\u5f0f\u5982\u4e0b<\/h3>\n<pre><code class=\"language-bash\">sample_id,vdj_contig_info,donor,origin\nIBC01-S,\/home\/zhoukaiwen\/IBC\/scTCR\/cellranger_vdj\/IBC01-S\/outs\/vdj_contig_info.pb,IBC01,01Skin\nIBC01-T,\/home\/zhoukaiwen\/IBC\/scTCR\/cellranger_vdj\/IBC01-T\/outs\/vdj_contig_info.pb,IBC01,01Tumor\nIBC02-S,\/home\/zhoukaiwen\/IBC\/scTCR\/cellranger_vdj\/IBC02-S\/outs\/vdj_contig_info.pb,IBC02,02Skin\nIBC02-T,\/home\/zhoukaiwen\/IBC\/scTCR\/cellranger_vdj\/IBC02-T\/outs\/vdj_contig_info.pb,IBC02,02Tumor\nIBC03-S,\/home\/zhoukaiwen\/IBC\/scTCR\/cellranger_vdj\/IBC03-S\/outs\/vdj_contig_info.pb,IBC03,03Skin\nIBC03-T,\/home\/zhoukaiwen\/IBC\/scTCR\/cellranger_vdj\/IBC03-T\/outs\/vdj_contig_info.pb,IBC03,03Tumor\nIBC04-S,\/home\/zhoukaiwen\/IBC\/scTCR\/cellranger_vdj\/IBC04-S\/outs\/vdj_contig_info.pb,IBC04,04Skin\nIBC04-T,\/home\/zhoukaiwen\/IBC\/scTCR\/cellranger_vdj\/IBC04-T\/outs\/vdj_contig_info.pb,IBC04,04Tumor\nIBC05-S,\/home\/zhoukaiwen\/IBC\/scTCR\/cellranger_vdj\/IBC05-S\/outs\/vdj_contig_info.pb,IBC05,05Skin\nIBC05-T,\/home\/zhoukaiwen\/IBC\/scTCR\/cellranger_vdj\/IBC05-T\/outs\/vdj_contig_info.pb,IBC05,05Tumor<\/code><\/pre>\n<h3>5.2 \u8fd0\u884c cellranger aggr<\/h3>\n<pre><code class=\"language-bash\">cellranger aggr --id=IBC1234TS_TCR --csv=aggr.csv<\/code><\/pre>\n<p>\u4e09\u4e2a\u53ef\u7528\u4e8eSeurat\u7684\u7ed3\u679c\u6587\u4ef6\u5728 IBC1234TS_TCR\/outs\/vdj_t \u91cc<\/p>\n<h1>Exome-seq \u5916\u663e\u5b50\u6d4b\u5e8f\u5206\u6790\u6d41\u7a0b<\/h1>\n<h2>1. \u5efa\u7acb\u6587\u4ef6\u5939<\/h2>\n<pre><code class=\"language-bash\">mkdir {rawdata,align,somatic,facets,msi,polysolver,lohhla,netmhcpan,sequenza}<\/code><\/pre>\n<h2>2. \u6570\u636e\u89e3\u538b<\/h2>\n<pre><code class=\"language-bash\">cd rawdata\n#!\/bin\/bash\nfor file in `ls \/Bulk\/Long\/GSE_data\/GSE50760\/*.1`;do\nfasterq-dump --split-3 $file -O \/Bulk\/metastasis\/GSE50760\/rawdata\ndone<\/code><\/pre>\n<h2>3. \u6570\u636e\u8d28\u63a7<\/h2>\n<h3>fastqc+multiqc\u67e5\u770b\u6d4b\u5e8f\u6570\u636e\u8d28\u91cf<\/h3>\n<pre><code class=\"language-bash\">cd \/Bulk\/metastasis\/SRP034161\/rawdata\nfastqc *fastq.gz\nmultiqc *.zip<\/code><\/pre>\n<h3>trimmomatic\u5355\u7aef\u6d4b\u5e8f\u53bb\u63a5\u5934<\/h3>\n<pre><code class=\"language-bash\">ls ..\/rawdata\/*.fq.gz|perl -ne &#039;chomp;my $name=$1 if($_=~\/\\\/([^\\\/]+)\\.fq.gz\/);print &quot;trimmomatic SE -phred33 -threads 4 $name\\.fq.gz $name\\.trimmed.fq.gz ILLUMINACLIP:TruSeq3-SE:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36\\n&quot;&#039; &gt;trimmomatic.sh<\/code><\/pre>\n<h3>trimmomatic\u53cc\u7aef\u6d4b\u5e8f\u53bb\u63a5\u5934<\/h3>\n<pre><code class=\"language-bash\">ls ..\/rawdata\/*_R1.fastq.gz|perl -ne &#039;chomp;my $name=$1 if($_=~\/\\\/([^\\\/]+)\\_R1\/);print &quot;trimmomatic PE -phred33 $name\\_R1.fq.gz $name\\_R2.fq.gz $name\\_R1_paired.fq.gz $name\\_R1_unpaired.fq.gz $name\\_R2_paired.fq.gz $name\\_R2_unpaired.fq.gz ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 \\n&quot;&#039; &gt;trimmomatic.sh<\/code><\/pre>\n<h2>4. \u5e8f\u5217\u6bd4\u5bf9 &amp; \u6807\u8bb0PCR\u91cd\u590d<\/h2>\n<h3>\u8054\u5ddd\u6d4b\u5e8f<\/h3>\n<pre><code class=\"language-bash\">cd ..\/align\nls ..\/rawdata\/*_R1.fastq.gz|perl -ne &#039;chomp;my $name=$1 if($_=~\/\\\/([^\\\/]+)\\_R1\/);print &quot;\/home\/zhoukaiwen\/software\/bwa-0.7.17\/bwa mem -t 10 -R \\&quot;\\@RG\\\\tID:$name\\\\tSM:$name\\\\tLB:WES\\\\tPL:Illumina\\&quot; \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa ..\/rawdata\/$name\\_R1.fastq.gz ..\/rawdata\/$name\\_R2.fastq.gz | samtools view -bS - &gt;$name.bam &amp;&amp; samtools sort -@ 5 -o $name.sort.bam $name.bam &amp;&amp; \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk --java-options \\&quot;-Xmx20G -Djava.io.tmpdir=.\/\\&quot; MarkDuplicates --INPUT $name.sort.bam --OUTPUT $name.sort.markdup.bam -M $name.metrics  --VALIDATION_STRINGENCY=LENIENT &amp;&amp; samtools index $name.sort.markdup.bam &amp;&amp;echo $name ok\\n&quot;&#039; &gt;aln.sh\n\nls ..\/rawdata\/*_val_1.fq.gz|perl -ne &#039;chomp;my $name=$1 if($_=~\/\\\/([^\\\/]+)\\_val\\_1\/);print &quot;\/home\/zhoukaiwen\/software\/bwa-0.7.17\/bwa mem -t 10 -R \\&quot;\\@RG\\\\tID:$name\\\\tSM:$name\\\\tLB:WES\\\\tPL:Illumina\\&quot; \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa ..\/rawdata\/$name\\_val\\_1.fq.gz ..\/rawdata\/$name\\_val\\_2.fq.gz | samtools view -bS - &gt;$name.bam &amp;&amp; samtools sort -@ 5 -o $name.sort.bam $name.bam &amp;&amp; \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk --java-options \\&quot;-Xmx20G -Djava.io.tmpdir=.\/\\&quot; MarkDuplicates --INPUT $name.sort.bam --OUTPUT $name.sort.markdup.bam -M $name.metrics  --VALIDATION_STRINGENCY=LENIENT &amp;&amp; samtools index $name.sort.markdup.bam &amp;&amp;echo $name ok\\n&quot;&#039; &gt;aln_val.sh\n\nchmod u+x aln.sh\nnohup .\/aln.sh&gt;aln.log 2&gt;&amp;1 &amp;<\/code><\/pre>\n<h3>\u660e\u7801\u751f\u7269\u6d4b\u5e8f<\/h3>\n<pre><code class=\"language-bash\">ls ..\/rawdata\/*_val_1.fq.gz|perl -ne &#039;chomp;my $name=$1 if($_=~\/\\\/([^\\\/]+)\\_val\/);print &quot;\/home\/zhoukaiwen\/software\/bwa-0.7.17\/bwa mem -t 10 -R \\&quot;\\@RG\\\\tID:$name\\\\tSM:$name\\\\tLB:WES\\\\tPL:Illumina\\&quot; \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa ..\/rawdata\/$name\\_val\\_1.fq.gz ..\/rawdata\/$name\\_val\\_1.fq.gz | samtools view -bS - &gt;$name.bam &amp;&amp; samtools sort -@ 5 -o $name.sort.bam $name.bam &amp;&amp; \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk --java-options \\&quot;-Xmx20G -Djava.io.tmpdir=.\/\\&quot; MarkDuplicates --INPUT $name.sort.bam --OUTPUT $name.sort.markdup.bam -M $name.metrics  --VALIDATION_STRINGENCY=LENIENT &amp;&amp; samtools index $name.sort.markdup.bam &amp;&amp;echo $name ok\\n&quot;&#039; &gt;aln.sh\nchmod u+x aln.sh\nnohup .\/aln.sh&gt;aln.log 2&gt;&amp;1 &amp;<\/code><\/pre>\n<h2>5. \u67e5\u770b\u6bd4\u5bf9\u7ed3\u679c<\/h2>\n<pre><code class=\"language-bash\">cd ..\/align\nls *.sort.markdup.bam|while read id;do samtools flagstat $id;done<\/code><\/pre>\n<h2>6. \u78b1\u57fa\u77eb\u6b63 BaseRecalibrator + ApplyBQSR<\/h2>\n<h3>\u78b1\u57fa\u77eb\u6b63\uff08\u5bf9base\u7684quality score\u8fdb\u884c\u6821\u6b63\uff09\u78b1\u57fa\u8d28\u91cf\u5206\u6570\u91cd\u6821\u51c6\uff08Base quality score recalibration\uff0cBQSR)\uff0c\u5c31\u662f\u5229\u7528\u673a\u5668\u5b66\u4e60\u7684\u65b9\u5f0f\u8c03\u6574\u539f\u59cb\u78b1\u57fa\u7684\u8d28\u91cf\u5206\u6570\u3002\u5b83\u5206\u4e3a\u4e24\u4e2a\u6b65\u9aa4:1\u3001\u5229\u7528\u5df2\u6709\u7684snp\u6570\u636e\u5e93\uff0c\u5efa\u7acb\u76f8\u5173\u6027\u6a21\u578b\uff0c\u4ea7\u751f\u91cd\u6821\u51c6\u8868( recalibration table)\u30022\u3001\u6839\u636e\u8fd9\u4e2a\u6a21\u578b\u5bf9\u539f\u59cb\u78b1\u57fa\u8fdb\u884c\u8c03\u6574\uff0c\u53ea\u4f1a\u8c03\u6574\u975e\u5df2\u77e5SNP\u533a\u57df<\/h3>\n<pre><code class=\"language-bash\">cd ..\/align\n\ngunzip dbsnp_138.hg19.vcf.gz\nbgzip dbsnp_138.hg19.vcf\ntabix -p vcf dbsnp_138.hg19.vcf.gz\n\ngunzip Mills_and_1000G_gold_standard.indels.hg19.sites.vcf.gz\nbgzip Mills_and_1000G_gold_standard.indels.hg19.sites.vcf\ntabix -p vcf Mills_and_1000G_gold_standard.indels.hg19.sites.vcf.gz\n\nref=\/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa\nsnp=\/home\/zhoukaiwen\/database\/gatk_bundle\/hg19\/dbsnp_138.hg19.vcf.gz\nindel=\/home\/zhoukaiwen\/database\/gatk_bundle\/hg19\/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf.gz\n\nls *.sort.markdup.bam|while read sample\ndo nohup \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk \\\n         --java-options &quot;-Xmx20G -Djava.io.tmpdir=.\/&quot; \\\n         BaseRecalibrator \\\n         -R $ref \\\n         -I $sample \\\n         --known-sites $snp \\\n         --known-sites $indel \\\n         -O ${sample%%.*}_recal.table \\\n         1&gt;${sample%%.*}_log.recal 2&gt;&amp;1 &amp;\ndone\n\n#BWA\u6bd4\u5bf9\u65f6\u8bbe\u7f6e@RG\u7684\u91cd\u8981\u6027\u5728\u8fd9\u4e00\u6b65\u4f53\u73b0\u51fa\u6765\u4e86\uff0c\u7b97\u6cd5\u901a\u8fc7@RG\u4e2d\u7684ID\u6765\u8bc6\u522b\u5404\u4e2a\u72ec\u7acb\u7684\u6d4b\u5e8f\u8fc7\u7a0b\nls *.sort.markdup.bam|while read sample\ndo nohup \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk \\\n         --java-options &quot;-Xmx20G -Djava.io.tmpdir=.\/&quot; ApplyBQSR \\\n         -R $ref  \\\n         -I $sample  \\\n         -bqsr ${sample%%.*}_recal.table \\\n         -O ${sample%%.*}_bqsr.bam \\\n         1&gt;${sample%%.*}_log.ApplyBQSR  2&gt;&amp;1 &amp;\ndone<\/code><\/pre>\n<h3>\u4f7f\u7528 perl \u811a\u672c\u6784\u5efa shell \u811a\u672c\uff0c\u6309\u6bcf\u4e2a\u6837\u672c\u8f93\u51fa\u4e00\u4efd\u4ee3\u7801\u8fd0\u884c BaseRecalibrator &amp; ApplyBQSR<\/h3>\n<pre><code class=\"language-bash\"># \u81ea\u5df1\u7684\u6570\u636e\u7528hg19.fa\nls *.sort.markdup.bam|perl -ne &#039;chomp;if($_=~\/(\\S+)\\.sort\/){print &quot;\/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk --java-options \\&quot;-Xmx20G -Djava.io.tmpdir=.\/\\&quot; BaseRecalibrator -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa -I $1.sort.markdup.bam --known-sites \/home\/zhoukaiwen\/database\/gatk_bundle\/hg19\/dbsnp_138.hg19.vcf.gz --known-sites \/home\/zhoukaiwen\/database\/gatk_bundle\/hg19\/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf.gz --O $1_recal.table &amp;&amp; \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk --java-options \\&quot;-Xmx20G -Djava.io.tmpdir=.\/\\&quot; ApplyBQSR -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa -I $1.sort.markdup.bam -bqsr $1_recal.table -O $1_bqsr.bam &amp;&amp; echo $1 ok\\n&quot;}&#039; &gt;BQSR.sh\n\n# TCGA-BRCA\u6570\u636e\u7528GRCh37-lite.fa\nls *.sort.markdup.bam|perl -ne &#039;chomp;if($_=~\/(\\S+)\\.sort\/){print &quot;\/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk --java-options \\&quot;-Xmx20G -Djava.io.tmpdir=.\/\\&quot; BaseRecalibrator -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/GRCh37-lite.fa -I $1.sort.markdup.bam --known-sites \/home\/zhoukaiwen\/database\/gatk_bundle\/hg19_without_chr\/dbsnp_138.hg19.vcf --known-sites \/home\/zhoukaiwen\/database\/gatk_bundle\/hg19_without_chr\/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf --known-sites  \/home\/zhoukaiwen\/database\/gatk_bundle\/hg19_without_chr\/1000G_phase1.indels.hg19.sites.vcf --O $1_recal.table &amp;&amp; \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk --java-options \\&quot;-Xmx20G -Djava.io.tmpdir=.\/\\&quot; ApplyBQSR -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/GRCh37-lite.fa -I $1.sort.markdup.bam -bqsr $1_recal.table -O $1_bqsr.bam &amp;&amp; echo $1 ok\\n&quot;}&#039; &gt;BQSR.sh\n\nnohup .\/BQSR.sh &gt;bqsr.log 2&gt;&amp;1 &amp;\n<\/code><\/pre>\n<h3>\u67e5\u770b\u6bd4\u5bf9\u7ed3\u679c<\/h3>\n<pre><code class=\"language-bash\">ls *bqsr.bam|while read id\ndo nohup samtools flagstat $id &amp;\ndone<\/code><\/pre>\n<pre><code class=\"language-bash\">\/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk --java-options &quot;-Xmx20G&quot; AnalyzeCovariates \\\n     -before recal_data.table1 \\\n     -after recal_data.table2 \\\n     -plots AnalyzeCovariates.pdf\n\nls *_bqsr.bam|while read sample\ndo nohup samtools index $sample &gt;$sample_bqsr_index.log 2&gt;&amp;1 &amp;\ndone<\/code><\/pre>\n<h2>7. Mutect2 \u5bfb\u627esomatic mutation<\/h2>\n<h3>\u4e0b\u8f7d\u5df2\u77e5\u7684germline mutation\u7528\u4e8e\u8fc7\u6ee4<\/h3>\n<pre><code class=\"language-bash\">gunzip -c af-only-gnomad.raw.sites.b37.vcf.gz &gt;af-only-gnomad.raw.sites.b37.vcf\ncat af-only-gnomad.raw.sites.b37.vcf|grep -v &quot;#&quot;|sed &#039;s\/^\/chr&amp;\/g&#039;&gt;af-only-gnomad.raw.sites.b37.tmp.vcf &amp;&amp; cat af-only-gnomad.raw.sites.b37.vcf|grep &quot;#&quot;&gt;header.vcf &amp;&amp; cat header.vcf af-only-gnomad.raw.sites.b37.tmp.vcf &gt;af-only-gnomad.raw.sites.b37.chr.vcf\nbgzip af-only-gnomad.raw.sites.b37.chr.vcf af-only-gnomad.raw.sites.b37.chr.vcf.gz\ntabix -p vcf af-only-gnomad.raw.sites.b37.chr.vcf.gz\nnohup java -Xmx4G -jar \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk-package-4.1.2.0-local.jar SelectVariants -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa -V \/home\/zhoukaiwen\/database\/gatk_bundle\/hg19\/af-only-gnomad.raw.sites.b37.chr.vcf.gz --select-type-to-include SNP --restrict-alleles-to BIALLELIC -O \/home\/zhoukaiwen\/database\/gatk_bundle\/hg19\/af-only-gnomad.raw.sites.b37.chr.SNP_biallelic.vcf.gz &amp;<\/code><\/pre>\n<h3>\u4f7f\u7528perl\u811a\u672c\u521b\u5efashell\u811a\u672c\u7528\u4e8e\u8fd0\u884cmutect2<\/h3>\n<pre><code class=\"language-bash\">cd ..\/somatic\nref=\/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa\ngermine_vcf=\/home\/zhoukaiwen\/database\/gatk_bundle\/hg19\/af-only-gnomad.raw.sites.b37.chr.vcf.gz\ngermine_biallelic_vcf=\/home\/zhoukaiwen\/database\/gatk_bundle\/hg19\/af-only-gnomad.raw.sites.b37.chr.SNP_biallelic.vcf.gz\ngenomic_intervals=\/home\/zhoukaiwen\/database\/database\/Agilent_V6.interval_list\n\ncreate.sh:\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\t\/;print &quot;java -Xmx4G -jar \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk-package-4.1.2.0-local.jar Mutect2 -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa -I ..\/align\/$a[0]_bqsr.bam -I ..\/align\/$a[1]_bqsr.bam -normal $a[0] -tumor $a[1] -L \/home\/zhoukaiwen\/database\/database\/Agilent_V6.interval_list --germline-resource \/home\/zhoukaiwen\/database\/gatk_bundle\/hg19\/af-only-gnomad.raw.sites.b37.chr.vcf.gz -O $a[1].raw.vcf &amp;&amp; java -Xmx4G -jar \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk-package-4.1.2.0-local.jar GetPileupSummaries -I ..\/align\/$a[0]_bqsr.bam -L \/home\/zhoukaiwen\/database\/database\/Agilent_V6.interval_list -V \/home\/zhoukaiwen\/database\/gatk_bundle\/hg19\/af-only-gnomad.raw.sites.b37.chr.SNP_biallelic.vcf.gz -O $a[0].pileups.table &amp;&amp; java -Xmx4G -jar \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk-package-4.1.2.0-local.jar GetPileupSummaries -I ..\/align\/$a[1]_bqsr.bam -L \/home\/zhoukaiwen\/database\/database\/Agilent_V6.interval_list -V \/home\/zhoukaiwen\/database\/gatk_bundle\/hg19\/af-only-gnomad.raw.sites.b37.chr.SNP_biallelic.vcf.gz -O $a[1].pileups.table &amp;&amp; java -Xmx4G -jar \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk-package-4.1.2.0-local.jar CalculateContamination -I $a[1].pileups.table -matched $a[0].pileups.table -O $a[1].CalculateContamination.table &amp;&amp; java -Xmx4G -jar \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk-package-4.1.2.0-local.jar FilterMutectCalls -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa -V $a[1].raw.vcf --contamination-table $a[1].CalculateContamination.table -O $a[1].somatic.vcf &amp;&amp; perl \/home\/zhoukaiwen\/software\/annovar\/convert2annovar.pl -format vcf4old -filter pass $a[1].somatic.vcf &gt;$a[1].somatic.annovar &amp;&amp; perl \/home\/zhoukaiwen\/software\/annovar\/table_annovar.pl $a[1].somatic.annovar \/home\/zhoukaiwen\/software\/annovar\/database\/humandb_hg19 --protocol refGene,gnomad_exome,avsnp150,icgc21,cosmic70,clinvar_20190305,dbnsfp35c,dbscsnv11,intervar_20180118 --operation g,f,f,f,f,f,f,f,f --buildver hg19 --remove --nastring - --thread 4 --otherinfo &amp;&amp; echo $a[1] somatic ok\\n&quot;&#039; sample_pair.txt &gt;mutect2.sh\n# sample_pair.txt \u4e3a tab \u5206\u9694\u7684 txt \u6587\u4ef6\uff0c\u7b2c\u4e00\u5217\u4e3a normal\uff0c\u7b2c\u4e8c\u5217\u4e3a tumor sample id\n\nnohup .\/mutect2.sh &gt; mutect2.log 2&gt;&amp;1&amp;<\/code><\/pre>\n<h1>8. \u5c06 vcf \u8f6c\u4e3a maf \u540e\u8fc7\u6ee4\uff0c\u7b5b\u9009 filter \u4e3a PASS<\/h1>\n<pre><code class=\"language-bash\">conda activate vep\nexport PERL5LIB=&quot;\/home\/zhoukaiwen\/software\/anaconda3\/envs\/vep\/lib\/site_perl\/5.26.2\/x86_64-linux-thread-multi&quot;\n\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\t\/;print &quot;vcf2maf.pl --input-vcf $a[1].somatic.vcf --output-maf $a[1].maf --normal-id $a[0] --tumor-id $a[1] --vcf-tumor-id $a[1] --vcf-normal-id $a[0] --vep-path \/home\/zhoukaiwen\/software\/anaconda3\/envs\/vep\/bin --vep-data \/home\/zhoukaiwen\/database\/vep --ref-fasta \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa &amp;&amp; cat $a[1].maf|grep -E \\&quot;version|FILTER|PASS\\&quot; &gt; $a[1]_filtered.maf &amp;&amp; echo $a[1] vcf2maf ok\\n&quot;&#039; sample_pair.txt &gt;vcf2maf.sh\n\nnohup .\/vcf2maf.sh &gt; vcf2maf.log 2&gt;&amp;1 &amp;<\/code><\/pre>\n<h2>9. \u5c06\u6240\u6709 maf \u5408\u5e76\u4e3a\u4e00\u4e2a\u6587\u4ef6\uff0c\u5e76\u7528maf \u8ba1\u7b97 VAF \uff08python\uff09<\/h2>\n<pre><code class=\"language-python\">import os\n\nall_maf = open(&quot;\/IBC\/WES\/somatic\/all.maf&quot;, &quot;a&quot;)\nVAF_maf = open(&quot;\/IBC\/WES\/somatic\/VAF.maf&quot;, &quot;a&quot;)\npath = &quot;\/IBC\/WES\/somatic\/&quot;\nfile_list = os.listdir(path)\nmaf_list = []\n\nfor file in os.listdir(path):\n    if file.endswith(&quot;filtered.maf&quot;):\n        maf_list.append(file)\n\nmaf_list.sort()\nprint(maf_list)\nn = 0\nx = 0\n\nVAF_maf.write(&quot;Hugo_Symbol Chromosome Start_Position End_Position Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele2 Ref_allele_depth Alt_allele_depth Tumor_Sample_Barcode VAF&quot; + &#039;\\n&#039;)\n\nfor file in maf_list:\n    if file == &quot;all.maf&quot;:\n        pass\n    elif file == &quot;VAF.maf&quot;:\n        pass\n    else:\n        f = open(path + &quot;\/&quot; + file, &quot;r&quot;)\n        for lines in f:\n            cutlines = lines.strip().split(&#039;\\t&#039;)\n            if &quot;#version&quot; in lines:\n                header1 = lines\n                if n == 0:\n                    all_maf.write(lines)\n                    n += 1\n            elif &quot;Hugo_Symbol&quot; in lines:\n                header2 = lines\n                if x == 0:\n                    all_maf.write(lines)\n                    x += 1\n            else:\n                all_maf.write(lines)\n                VAF = int(cutlines[42]) \/ (int(cutlines[41]) + int(cutlines[42]))\n                # $1,$5,$6,$7,$9,$10,$11,$13,$41,$42,$16\n\n                VAF_maf.write(cutlines[0] + &#039;\\t&#039; +\n                              cutlines[4] + &#039;\\t&#039; +\n                              cutlines[5] + &#039;\\t&#039; +\n                              cutlines[6] + &#039;\\t&#039; +\n                              cutlines[8] + &#039;\\t&#039; +\n                              cutlines[9] + &#039;\\t&#039; +\n                              cutlines[10] + &#039;\\t&#039; +\n                              cutlines[12] + &#039;\\t&#039; +\n                              cutlines[40] + &#039;\\t&#039; +\n                              cutlines[41] + &#039;\\t&#039; +\n                              cutlines[15] + &#039;\\t&#039; +\n                              str(VAF) + &#039;\\n&#039;)<\/code><\/pre>\n<h2>10. \u8ba1\u7b97\u5fae\u536b\u661f\u4e0d\u7a33\u5b9a<\/h2>\n<p><a href=\"https:\/\/github.com\/xjtu-omics\/msisensor-pro\/wiki\/Best-Practices\">msisensor-pro github \u94fe\u63a5<\/a><\/p>\n<pre><code class=\"language-bash\">cd ..\/msi\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\t\/;print &quot;\/home\/zhoukaiwen\/software\/msisensor\/binary\/msisensor_Linux_x86_64 msi -d \/home\/zhoukaiwen\/database\/database\/microsatellites.list -n ..\/align\/$a[0]_bqsr.bam -t ..\/align\/$a[1]_bqsr.bam -e \/home\/zhoukaiwen\/database\/database\/Agilent_V6.bed -o $a[1].msi &amp;&amp; echo $a[1] ok\\n&quot;&#039; ..\/somatic\/sample_pair.txt &gt;msi.sh\nnohup .\/msi.sh &gt; msi.log 2&gt;&amp;1 &amp;<\/code><\/pre>\n<h2>11. Facets \u8ba1\u7b97CNV<\/h2>\n<pre><code class=\"language-bash\"># Using R4.0.3 in ~\/.bashrc \ncd ..\/facets\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\t\/;print &quot;perl \/home\/zhoukaiwen\/software\/FACETS\/pileup\/snp-pileup.pl ..\/align\/$a[0]_bqsr.bam ..\/align\/$a[1]_bqsr.bam $a[1].txt &amp;&amp; cp \/home\/zhoukaiwen\/software\/FACETS\/demo.R $a[1].R &amp;&amp; sed -i \\&quot;s\/example\/$a[1]\/\\&quot; $a[1].R &amp;&amp; Rscript $a[1].R &amp;&amp; echo $a[1] facets ok\\n&quot;&#039; ..\/somatic\/sample_pair.txt &gt;facets.sh\nnohup .\/facets.sh &gt; facets.log 2&gt;&amp;1 &amp;\n\n# II\u671f\u670d\u52a1\u5668\u4e0a\u8dd1\nconda activate facets\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\t\/;print &quot;snp-pileup -A -p -v \/home\/zhoukaiwen\/database\/dbSNP\/human_9606_b151_GRCh37p13\/00-common_all.vcf.gz $a[1].txt ..\/align\/$a[0]_bqsr.bam ..\/align\/$a[1]_bqsr.bam &amp;&amp; cp \/home\/zhoukaiwen\/software\/FACETS\/demo.R $a[1].R &amp;&amp; sed -i \\&quot;s\/example\/$a[1]\/\\&quot; $a[1].R &amp;&amp; Rscript $a[1].R &amp;&amp; echo $a[1] facets ok\\n&quot;&#039; ..\/somatic\/sample_pair.txt &gt;facets.sh\n\nls *_cncf.xls|perl -ne &#039;chomp;if($_=~\/(\\S+)\\_cncf.xls\/){print &quot;perl \/home\/zhoukaiwen\/software\/facets_annovar.pl $_ &gt;$1.cnv.annovar &amp;&amp; perl \/home\/zhoukaiwen\/software\/annovar\/annotate_variation.pl -buildver hg19 $1.cnv.annovar \/home\/zhoukaiwen\/software\/annovar\/database\/humandb_hg19 &amp;&amp; perl \/home\/zhoukaiwen\/software\/annovar\/annotate_variation.pl -regionanno -build hg19 -out $1.cnv.annovar -dbtype cytoBand $1.cnv.annovar \/home\/zhoukaiwen\/software\/annovar\/database\/humandb_hg19 &amp;\\n&quot;}&#039; &gt; facets.annovar.sh\n\nnohup .\/facets.annovar.sh &gt; facets.annovar.log 2&gt;&amp;1 &amp;<\/code><\/pre>\n<h2>12. Sequenza \u8ba1\u7b97CNV<\/h2>\n<pre><code class=\"language-bash\"># \u8fd0\u884c\u4e00\u6b21\u5373\u53ef, \u7528\u4e8e\u6784\u5efa hg19.gc50Base.wig.gz \u6216 GRCh37-lite.gc50Base.wig.gz\ncd \/home\/zhoukaiwen\/database\/GRCh37_hg19 \nsequenza-utils gc_wiggle -w 50 --fasta GRCh37-lite.fa -o GRCh37-lite.gc50Base.wig.gz\nsequenza-utils gc_wiggle -w 50 --fasta hg19.fa -o hg19.gc50Base.wig.gz<\/code><\/pre>\n<pre><code class=\"language-bash\">cd ..\/sequenza\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\t\/;print &quot;sequenza-utils bam2seqz -n ..\/align\/$a[0]_bqsr.bam -t ..\/align\/$a[1]_bqsr.bam -F \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa -gc \/home\/zhoukaiwen\/database\/database\/hg19.gc50Base.wig.gz -o $a[1].seqz.gz &amp;&amp; sequenza-utils seqz_binning -s $a[1].seqz.gz -w 50 -o $a[1].small.seqz.gz &amp;&amp; cp \/home\/zhoukaiwen\/software\/sequenza\/demo.R $a[1].R &amp;&amp; sed -i \\&quot;s\/example\/$a[1]\/g\\&quot; $a[1].R &amp;&amp; Rscript $a[1].R &amp;&amp; echo $a[1] sequenza ok\\n&quot;&#039; ..\/somatic\/sample_pair.txt &gt;sequenza.sh\n\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\t\/;print &quot;sequenza-utils bam2seqz -n ..\/align\/$a[1]_bqsr.bam -t ..\/align\/$a[0]_bqsr.bam -F \/home\/zhoukaiwen\/database\/GRCh37_hg19\/GRCh37-lite.fa -gc \/home\/zhoukaiwen\/database\/GRCh37_hg19\/GRCh37-lite.gc50Base.wig.gz -o $a[0].seqz.gz &amp;&amp; sequenza-utils seqz_binning -s $a[0].seqz.gz -w 50 -o $a[0].small.seqz.gz &amp;&amp; cp \/home\/zhoukaiwen\/software\/sequenza\/demo.R $a[0].R &amp;&amp; sed -i \\&quot;s\/example\/$a[0]\/g\\&quot; $a[0].R &amp;&amp; Rscript $a[0].R &amp;&amp; echo $a[0] sequenza ok\\n&quot;&#039; ..\/somatic\/sample_pair.txt &gt;sequenza.sh\n\nnohup .\/sequenza.sh&gt;sequenza.log 2&gt;&amp;1 &amp;<\/code><\/pre>\n<h2>13. Manta call SV \u7ed3\u6784\u53d8\u5f02<\/h2>\n<pre><code class=\"language-bash\">ls *-T_bqsr.bam|perl -ne &#039;chomp;if($_=~\/(\\S+)\\-T\\_bqsr.bam\/){print &quot;\/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/python2 \/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/configManta.py --normalBam ..\/align\/$1-B_bqsr.bam --tumorBam  ..\/align\/$1-T_bqsr.bam  --referenceFasta \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa --runDir .\/$1 --exome\\n&quot;}&#039; &gt; manta.sh\nls *-T_bqsr.bam | perl -ne &#039;chomp;if($_=~\/(\\S+)\\-T\\_bqsr.bam\/){print &quot;\/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/python2 $1\/runWorkflow.py -m local -j 4 1\\&gt;$1\/$1\\_manta.log 2\\&gt;\\&amp;1\\n&quot;}&#039; &gt; ..\/manta\/RunManta.sh\n\n# TCGA-BRCA\nls *_T_bqsr.bam|perl -ne &#039;chomp;if($_=~\/(\\S+)\\_T\\_bqsr.bam\/){print &quot;\/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/python2 \/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/configManta.py --normalBam ..\/align\/$1_N_bqsr.bam --tumorBam  ..\/align\/$1_T_bqsr.bam --referenceFasta \/home\/zhoukaiwen\/database\/GRCh37_hg19\/GRCh37-lite.fa --runDir .\/$1 --exome\\n&quot;}&#039; &gt; manta.sh\nls *_T_bqsr.bam | perl -ne &#039;chomp;if($_=~\/(\\S+)\\_T\\_bqsr.bam\/){print &quot;\/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/python2 $1\/runWorkflow.py -m local -j 4 1\\&gt;$1\/$1\\_manta.log 2\\&gt;\\&amp;1\\n&quot;}&#039; &gt; ..\/manta\/RunManta.sh\n\n\/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/python2 ${tumor}\/runWorkflow.py -m local -j 4 1&gt;${tumor}\/${tumor}_manta.log 2&gt;&amp;1\nnohup \/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/python2 .\/*\/runWorkflow.py &amp;\n\ngunzip .\/*\/results\/variants\/*.gz\n\nfor i in `ls .\/|grep &quot;TCGA&quot;`; do \/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/python2 \/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/convertInversion.py \/home\/zhoukaiwen\/software\/anaconda3\/envs\/samtools1.13\/bin\/samtools \/home\/zhoukaiwen\/database\/GRCh37_hg19\/GRCh37-lite.fa .\/$i\/results\/variants\/somaticSV.vcf\\&gt;.\/somaticSV\/${i}_somatic_SV_convertedInversion.vcf; done\n\nfor i in `ls .\/|grep &quot;IBC&quot;`; do echo \/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/python2 \/home\/zhoukaiwen\/software\/anaconda3\/envs\/manta\/bin\/convertInversion.py \/home\/zhoukaiwen\/software\/anaconda3\/envs\/samtools1.13\/bin\/samtools \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa .\/$i\/results\/variants\/somaticSV.vcf\\&gt;.\/somaticSV\/${i}_somatic_SV_convertedInversion.vcf&gt;&gt;convertINV.sh; done\n# \u590d\u5236\u4ee5\u4e0a\u5185\u5bb9\u5e76\u8fd0\u884c\n\nfor i in `ls *_SV_convertedInversion.vcf`; do echo cat $i\\|grep -E \\&quot;#\\|PASS\\&quot;\\&gt;${i%_SV_convertedInversion.vcf}_SV_convertedInversion_filtered.vcf; done<\/code><\/pre>\n<h2>14. polysolver HLA\u57fa\u56e0\u578b\u9884\u6d4b<\/h2>\n<h3>bam \u6587\u4ef6\u67d3\u8272\u4f53\u524d\u5220\u9664 chr, 1.13\u7248\u672csamtools\u624d\u53ef\u4ee5\u7528samtools reheader<\/h3>\n<pre><code class=\"language-bash\">cd ..\/align\nls *_bqsr.bam &gt; sample_list.txt\n\npython\ninput_path = &quot;\/home\/zhoukaiwen\/IBC\/exome\/align\/&quot;\noutput_path = &quot;\/home\/zhoukaiwen\/IBC\/exome\/align\/&quot;\nsample_list = open(&quot;sample_list.txt&quot;, &quot;r&quot;)\nout_file = open(&quot;reheader.sh&quot;, &quot;a&quot;)\nfor lines in sample_list:\n    cutlines = lines.strip().split(&#039;.&#039;)\n    out_file.write(&quot;\/home\/zhoukaiwen\/software\/samtools-1.13\/samtools reheader -c \\&#039;perl -pe \\&quot;s\/^(@SQ.*)(\\\\tSN:)chr\/\\$1\\$2\/\\&quot;\\&#039; &quot; + input_path + cutlines[0] + &quot;.bam &gt; &quot; + output_path + cutlines[0] +&quot;_reheader.bam &amp;&amp; \/home\/zhoukaiwen\/software\/samtools-1.13\/samtools index &quot; + output_path + cutlines[0] +&quot;_reheader.bam &amp;&amp; echo &quot; + cutlines[0] + &quot; ok\\n&quot;)\n\nchmod u+x reheader.sh\nnohup .\/reheader.sh &amp;\n\nls *_bqsr_reheader.bam|perl -ne &#039;chomp;if($_=~\/(\\S+)\\_bqsr\/){print &quot;mkdir $1 &amp;&amp; bash \/home\/polysolver\/scripts\/shell_call_hla_type ..\/align\/$1_bqsr_reheader.bam Asian 1 hg19 STDFQ 0 .\/$1 &amp;&amp; echo $1 ok\\n&quot;}&#039; &gt; ..\/polysolver\/polysolver.sh\n\ndocker exec -it 5e81af527b75 bash\ncd \/Desktop\/home\/zhoukaiwen\/IBC\/exome\/polysolver\/\nnohup .\/polysolver.sh &amp;<\/code><\/pre>\n<h2>15. LOHHLA<\/h2>\n<h3>15.1 \u521b\u5efa\u6587\u4ef6\u5939<\/h3>\n<pre><code class=\"language-bash\">cd lohhla\nmkdir {lohhla_bam,hla,ploidy}<\/code><\/pre>\n<h3>15.2 \u5728lohhla\u6587\u4ef6\u5939\u5185\u624b\u52a8\u521b\u5efa list.txt\uff0c\u7b2c\u4e00\u5217\u4e3a\u75c5\u4eba\u53f7\uff0c\u7b2c\u4e8c\u5217\u4e3a\u75c5\u4eba\u6b63\u5e38\u53f7\uff0c\u5982<\/h3>\n<pre><code>IBC01 IBC01-B\nIBC02 IBC02-B\nIBC03 IBC03-B<\/code><\/pre>\n<h3>15.3 \u66f4\u6362\u6587\u4ef6\u540d\uff0cLOHHLA\u8981\u6c42\u6587\u4ef6\u540d\u4e3asample id + .bam\uff0c\u5c06\u66f4\u6362\u4e86\u6587\u4ef6\u540d\u7684\u6587\u4ef6\u5b58\u5165align\/lohhla_bam\u6587\u4ef6\u5939<\/h3>\n<pre><code class=\"language-bash\">cd ..\/align\nfor name in `ls *_bqsr.bam`;do nohup cp $name ..\/lohhla\/lohhla_bam\/${name%_bqsr.bam}.bam &amp;;done\ncd ..\/lohhla\/lohhla_bam\nfor name in `ls IBC0???.bam`;do nohup samtools index $name &gt;$name.log 2&gt;&amp;1 &amp;;done\nfor name in `ls IBC0??B.bam`; do nohup mkdir ${name%%-*} &amp;&amp; mv ${name%%-*}* ${name%%-*} &amp;; done<\/code><\/pre>\n<h3>15.4 \u66f4\u6539 polysolver \u91cc winners.hla.txt \u6587\u4ef6\u7684\u683c\u5f0f<\/h3>\n<pre><code class=\"language-bash\">cd ..\/polysolver\nls |grep &quot;IBC&quot;&gt;id.txt<\/code><\/pre>\n<pre><code class=\"language-python\"># python\npwd = &quot;\/home\/zhoukaiwen\/IBC\/exome\/polysolver\/&quot;\nid_list = open(pwd+&quot;id.txt&quot;,&quot;r&quot;)\n\nfor lines in id_list:\n    lines=lines.strip()\n    input_file = open(pwd+lines+&quot;\/&quot;+&quot;winners.hla.txt&quot;,&quot;r&quot;)\n    output_file = open(pwd+lines+&quot;\/&quot;+&quot;winners2.hla.txt&quot;,&quot;a&quot;)\n    for lines2 in input_file:\n        cutlines2 = lines2.strip().split(&#039;\\t&#039;)\n        output_file.write(cutlines2[1] + &#039;\\n&#039; + cutlines2[2] + &#039;\\n&#039;)<\/code><\/pre>\n<h3>15.5 \u53d6\u6b63\u5e38\u7ec4\u7ec7\u7684 winners2.hla.txt\u4f5c\u4e3a\u8be5\u75c5\u4eba\u7684hla\uff08\u5f85\u786e\u8ba4\uff09\uff0c\u62f7\u8d1d\u81f3 ..\/lohhla\/hla\/ \u6587\u4ef6\u5939\u4e0b<\/h3>\n<pre><code class=\"language-bash\">for name in `ls -d IBC0??B`; do cp $name\/winners2.hla.txt ..\/lohhla\/hla\/${name%%-B}.hla.txt; done<\/code><\/pre>\n<h3>15.6 \u4ece facets \u7ed3\u679c\u4e2d\u63d0\u53d6\u80bf\u7624\u7eaf\u5ea6\u548c\u500d\u6027\u4fe1\u606f<\/h3>\n<pre><code class=\"language-bash\">cd ..\/facets\nls *_info.xls&gt; info_list.txt<\/code><\/pre>\n<pre><code class=\"language-python\"># python\npwd = &quot;\/home\/zhoukaiwen\/IBC\/exome\/facets\/&quot;\ninfo_list = open(&quot;\/home\/zhoukaiwen\/IBC\/exome\/facets\/info_list.txt&quot;,&quot;r&quot;)\n\nfor lines in info_list:\n    lines=lines.strip()\n    input_file = open(pwd+lines,&quot;r&quot;)\n    cutlines = lines.strip().split(&#039;_&#039;)\n    output_file = open(pwd + cutlines[0] + &quot;_ploidy.txt&quot;, &quot;a&quot;)\n    output_file.write(&quot;Ploidy&quot; + &#039;\\t&#039; + &quot;tumorPurity&quot;+&#039;\\t&#039;+&quot;tumorPloidy\\n&quot;)\n    for lines2 in input_file:\n        cutlines2 = lines2.strip().split(&#039;\\t&#039;)\n        if &quot;x&quot; in lines2:\n            pass\n        elif &quot;\\&quot;1\\&quot;&quot; in lines2 :\n            purity = cutlines2[1]\n        elif &quot;\\&quot;2\\&quot;&quot; in lines2 :\n            ploidy = cutlines2[1]\n    output_file.write(cutlines[0] + &#039;\\t2\\t&#039; + purity + &#039;\\t&#039; + ploidy + &#039;\\n&#039;)<\/code><\/pre>\n<h3>15.7 \u5c06\u76f8\u540c\u75c5\u4eba\u7684\u7eaf\u5ea6\u4e0e\u500d\u6027\u4fe1\u606f\u5b58\u5165\u4e00\u4e2atxt\uff0c\u653e\u5728 ..\/lohhla\/ploidy \u6587\u4ef6\u5939\u4e0b<\/h3>\n<pre><code class=\"language-bash\">for name in `ls IBC0??S.R`;do cat ${name%%-S.R}-S_ploidy.txt|grep &quot;Ploidy&quot;&gt;..\/lohhla\/ploidy\/${name%%-S.R}_ploidy.txt &amp;&amp; cat ${name%%-S.R}*_ploidy.txt|grep -v &quot;Ploidy&quot;&gt;&gt;..\/lohhla\/ploidy\/${name%%-S.R}_ploidy.txt;done<\/code><\/pre>\n<p>\u6ce8\u610f\u82e5\u6b64\u5904\u80bf\u7624\u7eaf\u5ea6\u4e3aNA \u5219\u65e0\u6cd5\u7b97\u51faHLA_type1copyNum_withBAFBin \u548c HLA_type2copyNum_withBAFBin\uff0c\u82e5\u65e0\u6cd5\u7b97\u51fa\u80bf\u7624\u7eaf\u5ea6\u4fe1\u606f\uff08\u4e3aNA\uff09\uff0c\u5219\u75280.2\u4ee3\u66ff\uff0c\u4e5f\u53ef\u770b sequenza \u4e2d\u7684 _alternative_solutions.txt \u4e2d\u7684 cellularity<\/p>\n<h3>15.8 \u6587\u4ef6\u51c6\u5907\u5b8c\u6210\uff0c\u8fdb\u884cLOHHLA<\/h3>\n<pre><code class=\"language-bash\">cd ..\/lohhla\nconda activate lohhla\n\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\t\/;print &quot;lohhla --patientId $a[0] --outputDir \/home\/zhoukaiwen\/IBC\/exome\/lohhla\/$a[0]\/ --BAMDir \/home\/zhoukaiwen\/IBC\/exome\/lohhla\/lohhla_bam\/$a[0]\/ --normalBAMfile \/home\/zhoukaiwen\/IBC\/exome\/lohhla\/lohhla_bam\/$a[0]\/$a[1].bam --hlaPath \/home\/zhoukaiwen\/IBC\/exome\/lohhla\/hla\/$a[0].hla.txt --HLAfastaLoc \/home\/zhoukaiwen\/database\/LOHHLA\/data\/abc_complete.fasta --CopyNumLoc \/home\/zhoukaiwen\/IBC\/exome\/lohhla\/ploidy\/$a[0]_ploidy.txt --mappingStep TRUE --minCoverageFilter 10 --fishingStep TRUE --cleanUp FALSE --gatkDir \/home\/zhoukaiwen\/software\/picard-tools-1.119\/ --novoDir \/home\/zhoukaiwen\/software\/novocraftV3.08.01 &amp;&amp; echo $a[0] lohhla ok\\n&quot;&#039; list.txt &gt;lohhla.sh<\/code><\/pre>\n<p>HLA_type1copyNum_withBAFBin \u6216 HLA_type2copyNum_withBAFBin&lt;0.5<br \/>\nPVal_unique &lt; 0.05<br \/>\n\u624d\u80fd\u786e\u8ba4\u4e3aLossAllele<\/p>\n<h2>16. \u901a\u8fc7 docker \u4f7f\u7528 pTuneos \u65b0\u6297\u539f\u9884\u6d4b (\u6709bug\uff0cbwa\u8def\u5f84\u65e0\u6cd5\u8bbe\u7f6e\uff0c\u8df3\u8fc7)<\/h2>\n<pre><code class=\"language-bash\">docker exec -it 57e0b0fe505c bash<\/code><\/pre>\n<h2>17. netMHCpan \u65b0\u6297\u539f\u9884\u6d4b\uff08\u9700\u5b66\u4e60\u5982\u4f55\u7528 HGVSc \u63d0\u53d6\u6c28\u57fa\u9178\u5e8f\u5217\uff09<\/h2>\n<p>\u5c06 maf \u6587\u4ef6\u4e2d\u7684 RefSeq \u5bfc\u5165 <a href=\"https:\/\/www.uniprot.org\/uploadlists\/\">https:\/\/www.uniprot.org\/uploadlists\/<\/a><br \/>\nselect options \u91cc \u8bbe\u7f6eRefSeq Nucleotide To UniProtKB\uff0c\u4e0b\u8f7d\u8f93\u51fa\u6587\u4ef6\uff08fasta\u683c\u5f0f\uff09\uff0c\u5728\u6b64\u6587\u4ef6\u4e2d\u6293\u53d6HGVSc\u7684\u7a81\u53d8\uff0c\u4e4b\u540e\u5c06\u8fd9\u4e9b\u6293\u53d6\u51fa\u6765\u7684\u6c28\u57fa\u9178\u5bfc\u5165\u7f51\u9875\u7248 netMHCpan\uff08<a href=\"http:\/\/www.cbs.dtu.dk\/services\/NetMHCpan\/\uff09\">http:\/\/www.cbs.dtu.dk\/services\/NetMHCpan\/\uff09<\/a> \u6216 \u672c\u5730 netMHCpan<\/p>\n<h2>18. NeoPredPipe\u9884\u6d4b\u65b0\u6297\u539f<\/h2>\n<h3>18.1 \u66f4\u6539 polysolver \u91cc winners.hla.txt \u6587\u4ef6\u7684\u683c\u5f0f<\/h3>\n<pre><code class=\"language-bash\">cd ..\/polysolver\nls |grep &quot;IBC&quot;&gt;id.txt<\/code><\/pre>\n<pre><code class=\"language-python\">python\npwd = &quot;\/home\/zhoukaiwen\/IBC\/exome\/polysolver\/&quot;\nid_list = open(pwd+&quot;id.txt&quot;,&quot;r&quot;)\n\nfor lines in id_list:\n    lines=lines.strip()\n    input_file = open(pwd+lines+&quot;\/&quot;+&quot;winners.hla.txt&quot;,&quot;r&quot;)\n    output_file = open(pwd+lines+&quot;\/&quot;+&quot;winners3.hla.txt&quot;,&quot;a&quot;)\n    output_file.write(lines + &#039;\\t&#039;)\n    for lines2 in input_file:\n        cutlines2 = lines2.strip().split(&#039;\\t&#039;)\n        output_file.write(cutlines2[1] + &#039;\\t&#039; + cutlines2[2] + &#039;\\t&#039;)\n    output_file.write(&#039;\\n&#039;)<\/code><\/pre>\n<p>winners3.hla.txt\u4f5c\u4e3a\u8be5\u75c5\u4eba\u7684hla\uff08\u5f85\u786e\u8ba4\uff09\uff0c\u62f7\u8d1d\u81f3 ..\/lohhla\/hla\/ \u6587\u4ef6\u5939\u4e0b<\/p>\n<pre><code class=\"language-bash\">for name in `ls -d IBC0???`;do cp $name\/winners3.hla.txt ..\/neopredpipe\/hla\/$name.hla.txt; done<\/code><\/pre>\n<h3>18.2 \u5c06somatic\/vcf\u6587\u4ef6\u62f7\u8d1d\u81f3neopredpipe\/vcf\u76ee\u5f55\u4e0b\uff0c\u6ce8\u610f\u547d\u540d\u9700\u8981\u548c\u4e0a\u4e00\u6b65hla\u6587\u4ef6\u4e2d\u7684\u75c5\u4eba\u540d\u4e00\u81f4<\/h3>\n<pre><code class=\"language-bash\">cd neopredpipe &amp;&amp; mkdir {vcf,hla,out}\ncd vcf\nfor name in `ls *.somatic.vcf`;do mkdir ..\/neopredpipe\/vcf\/$name &amp;&amp; cp $name ..\/neopredpipe\/vcf\/$name\/${name%%.somatic.vcf}.vcf; done\n\nls -d *&gt;list.txt\n\nls -d *|perl -ne &#039;chomp;print &quot;python NeoPredPipe.py -I \/Desktop\/home\/zhoukaiwen\/IBC\/exome\/neopredpipe\/vcf\/$1\/ -H \/Desktop\/home\/zhoukaiwen\/IBC\/exome\/neopredpipe\/hla\/$1.hla.txt -o out\/$1 -n $1 &amp;&amp; echo $1 ok\\n&quot;&#039; &gt;..\/NeoPredPipe.sh\n\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\n\/;print &quot;python NeoPredPipe.py -I \/Desktop\/home\/zhoukaiwen\/IBC\/exome\/neopredpipe\/vcf\/$0\/ -H \/Desktop\/home\/zhoukaiwen\/IBC\/exome\/neopredpipe\/hla\/$0.hla.txt -o out\/$0 -n $0 &amp;&amp; echo $0 ok\\n&quot;&#039; list.txt &gt; ..\/NeoPredPipe.sh\n\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\t\/;print &quot;$0&quot;&#039; list.txt\n\npython NeoPredPipe.py -I \/Desktop\/NeoPredPipe_test\/ -H \/Desktop\/polysolver_test_out\/Neopredpipe_HLAfile.txt -o \/Neopredpipe_out\/ -n test<\/code><\/pre>\n<h2>19. HAPSEG + ABSOLUTE \u9884\u6d4b\u80bf\u7624\u7eaf\u5ea6&amp;\u500d\u6027<\/h2>\n<pre><code class=\"language-R\"># R\nlibrary(ABSOLUTE)<\/code><\/pre>\n<h2>WES call germ line mutation<\/h2>\n<h3>\u63a5\u4e0a\u7b2c6\u6b65<\/h3>\n<h3>7. \u5728 GVCF \u6a21\u5f0f\u4e0b\u5bf9\u6bcf\u4e2a\u6837\u672c call \u7a81\u53d8\uff1aHaplotypeCaller<\/h3>\n<pre><code class=\"language-bash\">ls *_bqsr.bam|perl -ne &#039;chomp;if($_=~\/(\\S+)\\_bqsr\/){print &quot;java -Xmx20G -jar \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk-package-4.1.2.0-local.jar HaplotypeCaller -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa -ERC GVCF -I ..\/align\/$1_bqsr.bam -O ..\/germline\/$1_HaplotypeCaller.vcf --intervals \/home\/zhoukaiwen\/database\/database\/Agilent_V6.interval_list &amp;&amp; echo $1 germline ok\\n&quot;}&#039; &gt; ..\/germline\/HaplotypeCaller.sh\n\nnohup java -Xmx20G -jar \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk-package-4.1.2.0-local.jar GenomicsDBImport -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa $(ls *.vcf | awk &#039;{print &quot;-V &quot;$0&quot; &quot;}&#039;) -L \/home\/zhoukaiwen\/database\/database\/Agilent_V6.interval_list --merge-input-intervals TRUE --genomicsdb-workspace-path .\/gvcfs_db &gt; GenomicsDBImport.log 2&gt;&amp;1 &amp; \n#\u6240\u6709\u6837\u672c\u5408\u4e3a1\u4e2a\uff0c\u5e94\u5206\u75c5\u4eba\u5408\u6210\n\n#\u624b\u52a8\u4fee\u6539\u4e3a\u75c5\u4eba\u7f16\u53f7\nls *_HaplotypeCaller.vcf|perl -ne &#039;chomp;if($_=~\/(\\S+)\\_HaplotypeCaller.vcf\/){print &quot;$1\\n&quot;}&#039;&gt;sample_list.txt \n\nls *_HaplotypeCaller.vcf|perl -ne &#039;chomp;if($_=~\/(\\S+)\\_HaplotypeCaller.vcf\/){print &quot;java -Xmx20G -jar \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk-package-4.1.2.0-local.jar GenomicsDBImport -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa ..\/align\/$0.vcf -L \/home\/zhoukaiwen\/database\/database\/Agilent_V6.interval_list --merge-input-intervals TRUE --genomicsdb-workspace-path .\/gvcfs_db&amp;&amp; echo $1 germline ok\\n&quot;}&#039; &gt; ..\/germline\/GenomicsDBImport.sh\n\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\t\/;print &quot;java -Xmx20G -jar \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk-package-4.1.2.0-local.jar GenomicsDBImport -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa -V ..\/germline\/$a[0]-B_HaplotypeCaller.vcf -V ..\/germline\/$a[0]-S_HaplotypeCaller.vcf -V ..\/germline\/$a[0]-T_HaplotypeCaller.vcf -L \/home\/zhoukaiwen\/database\/database\/Agilent_V6.interval_list --merge-input-intervals TRUE --genomicsdb-workspace-path .\/$a[0]_gvcfs_db &amp;&amp; echo $a[0] GenomicsDBImport ok\\n&quot;&#039; sample_list.txt &gt;GenomicsDBImport.sh\n\nnohup .\/GenomicsDBImport.sh &gt; GenomicsDBImport.log 2&gt;&amp;1 &amp;\n\nperl -ne &#039;chomp;next if($_=~\/^$\/);my @a=split \/\\t\/;print &quot;java -Xmx20G -jar \/home\/zhoukaiwen\/software\/gatk-4.1.2.0\/gatk-package-4.1.2.0-local.jar GenotypeGVCFs -R \/home\/zhoukaiwen\/database\/GRCh37_hg19\/hg19.fa -V gendb:\/\/$a[0]_gvcfs_db -O $a[0]_germline_merged.vcf &amp;&amp; echo $a[0] GenotypeGVCFs ok\\n&quot;&#039; sample_list.txt &gt;GenotypeGVCFs.sh\n\nnohup .\/GenotypeGVCFs.sh &gt; GenotypeGVCFs.log 2&gt;&amp;1 &amp;<\/code><\/pre>\n<h1>RNA-seq \u8f6c\u5f55\u7ec4\u6d4b\u5e8f\u5206\u6790\u6d41\u7a0b<\/h1>\n<h2>1. \u521b\u5efa\u6587\u4ef6\u5939<\/h2>\n<pre><code class=\"language-bash\">mkdir {rawdata, align, rnaseqc, fusion}<\/code><\/pre>\n<h2>2. \u6570\u636e\u89e3\u538b<\/h2>\n<pre><code class=\"language-bash\">#!\/bin\/bash\nfor file in `ls \/Bulk\/Long\/GSE_data\/GSE50760\/*.1`;do\nfasterq-dump --split-3 $file -O \/Bulk\/metastasis\/GSE50760\/rawdata\ndone<\/code><\/pre>\n<h2>3. \u6570\u636e\u8d28\u63a7<\/h2>\n<pre><code class=\"language-bash\">cd \/Bulk\/metastasis\/SRP034161\/rawdata\nls *.fq.gz|xargs fastqc -t 4 -o .\/\nmultiqc *.zip<\/code><\/pre>\n<h2>4. \u5e8f\u5217\u6bd4\u5bf9<\/h2>\n<pre><code class=\"language-bash\"># hisat2 \u6784\u5efa\u53c2\u8003\u57fa\u56e0\u7ec4\u7d22\u5f15\nhisat2-build .\/hg38.fa hg38\n# hisat2 \u526a\u63a5\u4f4d\u70b9\u4fe1\u606f\u7d22\u5f15\u6dfb\u52a0\nhisat2_extract_splice_sites.py .\/gencode.v46.annotation.gtf &gt; gencode.v46.annotation.splicesites\n\ncd \/data\/user\/heminghui\/project\/hepatoblastoma\/multifocal\/mRNA\/align\n# \u53cc\u7aef\uff1a\nls ..\/rawdata\/*_R1.fq.gz|perl -ne &#039;chomp;my $a=$_;$a=~s\/\\_R1\/\\_R2\/;if($_=~\/\\\/([^\\\/]+)\\_R1\/){print &quot;\/home\/zhoukaiwen\/software\/hisat2-2.1.0\/hisat2 -p 10 -x \/home\/zhoukaiwen\/database\/database\/hg19\/hg19 --known-splicesite-infile \/home\/zhoukaiwen\/database\/database\/gencode.v31lift37.annotation.splicesites -1 $_ -2 $a --rg-id $1 --rg SM:$1 | samtools view -bS - &gt;$1.bam &amp;&amp; samtools sort -@ 5 -o $1.sort.bam $1.bam &amp;&amp; samtools index $1.sort.bam &amp;&amp; echo $1 align ok\\n&quot;}&#039; &gt;aln.sh\nchmod u+x aln.sh\nnohup \/data\/user\/heminghui\/bin\/parallel_run.pl aln.sh &amp;<\/code><\/pre>\n<h2>5. rnaseqc \u751f\u6210\u8868\u8fbe\u77e9\u9635<\/h2>\n<pre><code class=\"language-bash\">cd \/data\/user\/heminghui\/project\/hepatoblastoma\/multifocal\/mRNA\/rnaseqc\n# \u751f\u6210rpkm\u8868\u8fbe\u77e9\u9635\nls ..\/align\/*.sort.bam|perl -ne &#039;chomp;if($_=~\/\\\/([^\\\/]+)\\.sort\/){print &quot;\/home\/zhoukaiwen\/software\/rnaseqc \/home\/zhoukaiwen\/database\/database\/gencode.v31lift37.gene.gtf $_ .\/ -s $1 -q 1 -u --rpkm --coverage &amp;&amp; echo $1 ok\\n&quot;}&#039; &gt;rnaseqc_rpkm.sh\n# \u751f\u6210tpm\u8868\u8fbe\u77e9\u9635\nls ..\/align\/*.sort.bam|perl -ne &#039;chomp;if($_=~\/\\\/([^\\\/]+)\\.sort\/){print &quot;\/home\/zhoukaiwen\/software\/rnaseqc \/home\/zhoukaiwen\/database\/database\/gencode.v31lift37.gene.gtf $_ .\/ -s $1 -q 1 -u --coverage &amp;&amp; echo $1 ok\\n&quot;}&#039; &gt;rnaseqc_tpm.sh\n\nnohup .\/rnaseqc_tpm.sh&gt;rnaseqc_tpm.log 2&gt;&amp;1 &amp;\n\n# lncRNA (\u4e0d\u786e\u5b9a\u662f\u4e0d\u662f\u8fd9\u4e48\u8dd1)\nls ..\/lnc_align\/*.sort.bam|perl -ne &#039;chomp;if($_=~\/\\\/([^\\\/]+)\\.sort\/){print &quot;\/home\/zhoukaiwen\/software\/rnaseqc \/home\/zhoukaiwen\/database\/GRCh37_hg19\/gencode.v39lift37.long_noncoding_RNAs.gtf $_ .\/ -s $1 -q 1 -u --rpkm --coverage &amp;&amp; echo $1 ok\\n&quot;}&#039; &gt;rnaseqc_rpkm.sh<\/code><\/pre>\n<h3>rpkm\u6587\u4ef6\u5408\u5e76\uff08python\uff09\uff0c\u5408\u5e76\u540e\u53ef\u4ee5\u7528\u4e8e\u8dd1CIBERSORT<\/h3>\n<pre><code class=\"language-python\">import glob\nfile_list = glob.glob(&quot;\/home\/zhoukaiwen\/IBC\/RNA\/rnaseqc\/*.gene_rpkm.gct&quot;)\nf_out_combined = open(&quot;\/home\/zhoukaiwen\/IBC\/RNA\/rnaseqc\/rpkm_merged.txt&quot;, &quot;a&quot;)\nsample_list = []\ngene_list = []\nall_dict = {}\nfor file in file_list:\n    f = open(file.strip())\n    for lines in f:\n        cutlines = lines.strip().split(&quot;\\t&quot;)\n        if &quot;#1.2&quot; in lines:  # \u53bb\u9664\u5f00\u5934\u7684\u7248\u672c\u7b26\u53f7#1.2\n            pass\n        elif &quot;59953&quot; in cutlines[0]:  # \u53bb\u9664\u7b2c\u4e8c\u884c\u7684\u4e24\u4e2a\u6570\u5b57\uff0c\u5e94\u8be5\u662f\u57fa\u56e0\u6570\u548c\u6837\u672c\u6570\n            pass\n        elif &quot;Name&quot; in lines:\n            sample = cutlines[2]\n            sample_list.append(sample)\n        else:\n            if cutlines[1] in gene_list:\n                all_dict[cutlines[1]][sample] = cutlines[2]\n            else:\n                gene_list.append(cutlines[1])\n                all_dict[cutlines[1]] = {}\n                all_dict[cutlines[1]][sample] = cutlines[2]\nsample_list.sort()\nheading_sample = &quot;\\t&quot;.join(sample_list)\nf_out_combined.write(&quot;Gene&quot; + &quot;\\t&quot; + heading_sample + &quot;\\n&quot;)\nfor n in gene_list:\n    f_out_combined.write(n + &quot;\\t&quot;)   \n    for i in range(len(sample_list)):\n            f_out_combined.write(str(all_dict[n][sample_list[i]]) + &quot;\\t&quot;)\n    f_out_combined.write(&quot;\\n&quot;)<\/code><\/pre>\n<h3>reads\u6587\u4ef6\u5408\u5e76\uff0c\u7528\u4e8e\u4e0b\u6e38\u5206\u6790<\/h3>\n<pre><code class=\"language-python\">import glob\nfile_list = glob.glob(&quot;\/IBC\/RNA\/rnaseqc\/*.gene_reads.gct&quot;)\nf_out_combined = open(&quot;\/IBC\/RNA\/rnaseqc\/reads_merged.txt&quot;, &quot;a&quot;)\n\nsample_list = []\ngene_list = []\nall_dict = {}\n\nfor file in file_list:\n    f = open(file.strip())\n    for lines in f:\n        cutlines = lines.strip().split(&quot;\\t&quot;)\n        if &quot;#1.2&quot; in lines:  # \u53bb\u9664\u5f00\u5934\u7684\u7248\u672c\u7b26\u53f7#1.2\n            pass\n        elif &quot;59953&quot; in cutlines[0]:  # \u53bb\u9664\u7b2c\u4e8c\u884c\u7684\u4e24\u4e2a\u6570\u5b57\uff0c\u5e94\u8be5\u662f\u57fa\u56e0\u6570\u548c\u6837\u672c\u6570\n            pass\n        elif &quot;Name&quot; in lines:\n            sample = cutlines[2]\n            sample_list.append(sample)\n        else:\n            if cutlines[1] in gene_list:\n                all_dict[cutlines[1]][sample] = cutlines[2]\n            else:\n                gene_list.append(cutlines[1])\n                all_dict[cutlines[1]] = {}\n                all_dict[cutlines[1]][sample] = cutlines[2]\n\nsample_list.sort()\nheading_sample = &quot;\\t&quot;.join(sample_list)\n\nf_out_combined.write(&quot;Gene&quot; + &quot;\\t&quot; + heading_sample + &quot;\\n&quot;)\n\nfor n in gene_list:\n    f_out_combined.write(n + &quot;\\t&quot;)   \n    for i in range(len(sample_list)):\n            f_out_combined.write(str(all_dict[n][sample_list[i]]) + &quot;\\t&quot;)\n    f_out_combined.write(&quot;\\n&quot;)<\/code><\/pre>\n<h3>count\u3001rpkm\/fpkm\u3001tpm \u8f6c\u6362\uff08R\u8bed\u8a00\u4ee3\u7801\uff09<\/h3>\n<pre><code class=\"language-R\">countToTpm &lt;- function(counts, effLen)\n{\n    rate &lt;- log(counts) - log(effLen)\n    denom &lt;- log(sum(exp(rate)))\n    exp(rate - denom + log(1e6))\n}\n\ncountToFpkm &lt;- function(counts, effLen)\n{\n    N &lt;- sum(counts)\n    exp( log(counts) + log(1e9) - log(effLen) - log(N) )\n}\n\nfpkmToTpm &lt;- function(fpkm)\n{\n    exp(log(fpkm) - log(sum(fpkm)) + log(1e6))\n}\n\neg.#fpkmToTpm\nexpMatrix&lt;-read.table(&quot;fpkm_expr.txt&quot;,header = T,row.names = 1)\ntpms &lt;- apply(expMatrix,2,fpkmToTpm)\ntpms[1:3,]\n#\u6700\u540e\u53ef\u4ee5\u6839\u636eTPM\u7684\u7279\u5f81\u8fdb\u884c\u68c0\u67e5\uff0c\u770b\u6bcf\u5217\u52a0\u548c\u662f\u5426\u4e00\u81f4\ncolSums(tpms)<\/code><\/pre>\n<h2>6. starfusion \u878d\u5408\u57fa\u56e0\u5206\u6790<\/h2>\n<pre><code class=\"language-bash\">cd \/data\/user\/heminghui\/project\/hepatoblastoma\/multifocal\/mRNA\/fusion\nexport PERL5LIB=&quot;\/usr\/local\/lib64\/perl5\/&quot;\nls ..\/rawdata\/*_R1.fq.gz|perl -ne &#039;chomp;my $a=$_;$a=~s\/\\_R1\/\\_R2\/;if($_=~\/\\\/([^\\\/]+)\\_R1\/){print &quot;\/home\/zhoukaiwen\/software\/STAR-Fusion.v1.10.1\/STAR-Fusion --genome_lib_dir \/home\/zhoukaiwen\/database\/star-fusion-index\/GRCh37_gencode_v19_CTAT_lib_Mar012021.plug-n-play\/ctat_genome_lib_build_dir --left_fq $_ --right_fq $a --CPU 10 --output_dir $1 &amp;&amp; echo $1 fusion ok\\n&quot;}&#039; &gt;fusion.sh\nnohup \/data\/user\/heminghui\/bin\/parallel_run.pl fusion.sh &amp;\n\nconda activate starfusion\nls ..\/rawdata\/*_R1.fq.gz|perl -ne &#039;chomp;my $a=$_;$a=~s\/\\_R1\/\\_R2\/;if($_=~\/\\\/([^\\\/]+)\\_R1\/){print &quot;STAR-Fusion --genome_lib_dir \/home\/zhoukaiwen\/database\/star-fusion-index\/GRCh37_gencode_v19_CTAT_lib_Mar012021.plug-n-play\/ctat_genome_lib_build_dir --left_fq $_ --right_fq $a --CPU 10 --output_dir $1 &amp;&amp; echo $1 fusion ok\\n&quot;}&#039; &gt;fusion.sh<\/code><\/pre>\n<h3>\u6279\u91cf\u66f4\u6539\u7ed3\u679c\u6587\u4ef6\u540d<\/h3>\n<pre><code class=\"language-bash\">for n in `ls .\/*\/*.tsv`;do cp $n ${n%\/*}_${n##*\/};done<\/code><\/pre>\n<h2>bam \u6587\u4ef6\u6dfb\u52a0\/\u5220\u9664 Chr\/chr 1.13\u7248\u672csamtools\u624d\u53ef\u4ee5\u7528samtools reheader<\/h2>\n<pre><code class=\"language-bash\"># Remove comment lines\nsamtools reheader -c &#039;grep -v ^@CO&#039; in.bam\n\n# Add \u201cChr\u201d prefix to chromosome names. Note extra backslashes before dollar signs to prevent unwanted shell variable expansion.\nsamtools reheader -c &#039;perl -pe &quot;s\/^(@SQ.*)(\\tSN:)(\\d+|X|Y|MT)(\\s|\\$)\/\\$1Chr\\$2\\$3\/&quot;&#039; in.bam\n\n# Remove \u201cChr\u201d prefix\nsamtools reheader -c &#039;perl -pe &quot;s\/^(@SQ.*)(\\tSN:)Chr\/\\$1\\$2\/&quot;&#039; in.bam\n\nfor i in `ls *.sort.markdup.bam`;do samtools reheader -c &#039;perl -pe &quot;s\/^(@SQ.*)(\\tSN:)(\\d+|X|Y|MT)(\\s|\\$)\/\\$1chr\\$2\\$3\/&quot;&#039; $i;done\n\nsamtools index *.sort.markdup.bam<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u5185\u5bb9\u94fe\u63a5 scRNA-seq \u5355\u7ec6\u80de\u5206\u6790\u6d41\u7a0b Exome-seq \u5916\u663e\u5b50\u6d4b\u5e8f\u5206\u6790\u6d41\u7a0b RNA-seq \u8f6c\u5f55\u7ec4\u6d4b&#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-63","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"_links":{"self":[{"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/posts\/63","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/comments?post=63"}],"version-history":[{"count":7,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/posts\/63\/revisions"}],"predecessor-version":[{"id":183,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/posts\/63\/revisions\/183"}],"wp:attachment":[{"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/media?parent=63"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/categories?post=63"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/tags?post=63"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}