{"id":390,"date":"2025-01-08T16:53:29","date_gmt":"2025-01-08T08:53:29","guid":{"rendered":"https:\/\/www.kz-hub.tech\/?p=390"},"modified":"2025-01-17T15:30:38","modified_gmt":"2025-01-17T07:30:38","slug":"biscut","status":"publish","type":"post","link":"https:\/\/www.kz-hub.tech\/index.php\/2025\/01\/08\/biscut\/","title":{"rendered":"\u4ece\u96f6\u5f00\u59cb\u7684BISCUT+\u63a8\u65ad\u4f53\u7cfbCNA\u7684fitness(\u9002\u5e94\u5ea6)"},"content":{"rendered":"<ul>\n<li><a href=\"https:\/\/github.com\/beroukhim-lab\/BISCUT-py3\">Github\u7f51\u5740<\/a><\/li>\n<\/ul>\n<h2>\u5b89\u88c5<\/h2>\n<pre><code class=\"language-bash\">conda create -n biscut\nconda activate biscut\n\n# BISCUT \u5fc5\u987b\u8981\u7528\u7279\u5b9a\u7248\u672c\u7684R\u548cPython\nconda install conda-forge::r-base=4.1.2\nconda install conda-forge::python=3.9.7\n\n# \u5b89\u88c5\u6240\u9700python\u5305\npip install pandas -i https:\/\/pypi.tuna.tsinghua.edu.cn\/simple\/\npip install multiprocessing -i https:\/\/pypi.tuna.tsinghua.edu.cn\/simple\/\npip install numpy -i https:\/\/pypi.tuna.tsinghua.edu.cn\/simple\/\npip install operator -i https:\/\/pypi.tuna.tsinghua.edu.cn\/simple\/\npip install itertools -i https:\/\/pypi.tuna.tsinghua.edu.cn\/simple\/\npip install fnmatch -i https:\/\/pypi.tuna.tsinghua.edu.cn\/simple\/\n\n# \u5b89\u88c5\u6240\u9700R\u5305\ninstall.packages(c(&quot;ismev&quot;,&quot;extRemes&quot;,&quot;fitdistrplus&quot;,&quot;truncdist&quot;,&quot;segmented&quot;,&quot;dplyr&quot;,&quot;reticulate&quot;,&quot;foreach&quot;,&quot;doParallel&quot;,&quot;pastecs&quot;,&quot;ismev&quot;,&quot;ggplot2&quot;,&quot;fitdistrplus&quot;,&quot;gridExtra&quot;,&quot;stringr&quot;,&quot;gtable&quot;,&quot;cowplot&quot;,&quot;BiocManager&quot;))\n# \u4f7f\u7528\u897f\u6e56\u5927\u5b66\u955c\u50cf\u52a0\u901f\noptions(BioC_mirror=&quot;https:\/\/mirrors.westlake.edu.cn\/bioconductor&quot;)\nBiocManager::install(&quot;GenomicRanges&quot;)<\/code><\/pre>\n<h2>\u5148\u8fd0\u884cBISCUT_preprocessing.py<\/h2>\n<pre><code class=\"language-python\"># python\u5185\u8bbe\u7f6e\u90e8\u5206\u5982\u4e0b\n# hg19\u548chg38\u90fd\u53ef\u4ee5\u7528SNP6_hg19_chromosome_locs_200605.txt\u4f5c\u4e3a\u4f4d\u7f6e\u6587\u4ef6\u6765\u5b9a\u4e49\u7aef\u7c92\u548c\u7740\u4e1d\u7c92\u7684\u4f4d\u7f6e\n# \u6ce8\u610fseg\u6587\u4ef6\u9700\u8981\u653e\u5728python\u811a\u672c\u4e0b\u4e00\u4e2a\u53ebdoc\u7684\u6587\u4ef6\u5939\u5185\uff0c\u5176\u6587\u4ef6\u540d\u683c\u5f0f\u4e3atumor_type+seg_file_suffix\uff0c\u5982\u4ee5\u4e0b\u6587\u4ef6\u540d\u4e3aLiverOld_LIHC_merge_old_552.seg\uff0c\u6ce8\u610ftumor_type\u4e0d\u80fd\u6709\u4e0b\u5212\u7ebf&quot;_&quot;\uff0c\u5426\u5219\u540e\u7eed\u4f1a\u62a5\u9519\namplitude_threshold = 0.2\nchromosome_coordinates = &#039;~\/software\/BISCUT-py3-main\/docs\/SNP6_hg19_chromosome_locs_200605.txt&#039;\ntumor_type = &#039;LiverOld&#039;\nseg_file_suffix = &#039;_LIHC_merge_old_552.seg&#039;\nn_proc = 8 \ndate_suffix = &#039;2025_01_09&#039;<\/code><\/pre>\n<h2>\u6784\u5efahg38\u7248\u672c\u7684\u57fa\u56e0\u540d-cytoband\u5bf9\u5e94\u6587\u4ef6<\/h2>\n<pre><code class=\"language-bash\"># \u4e0b\u8f7ducsc\u7684hg38\u7248\u672c\u7684cytoband\u6587\u4ef6\uff1a\n# https:\/\/hgdownload.soe.ucsc.edu\/goldenPath\/hg38\/database\/cytoBand.txt.gz\n# \u4e0b\u8f7ducsc\u7684hg38\u7248\u672c\u7684gtf\u6587\u4ef6:\n# http:\/\/hgdownload.soe.ucsc.edu\/goldenPath\/hg38\/bigZips\/genes\/hg38.refGene.gtf.gz<\/code><\/pre>\n<p>\u901a\u8fc7\u4ee5\u4e0bR\u811a\u672c\u8fdb\u884c\u5408\u5e76\uff1a<\/p>\n<pre><code class=\"language-R\">library(dplyr)\nlibrary(tidyr,lib.loc = &quot;\/usr\/lib\/R\/site-library&quot;)\nlibrary(stringr,lib.loc = &quot;\/usr\/lib\/R\/site-library&quot;)\nlibrary(IRanges)\n\nUCSC_cytoband_file &lt;- &quot;~\/database\/GRCh38\/ucsc\/UCSC_cytoBand.txt&quot;\nUCSC_refgene_gtf &lt;- &quot;~\/database\/GRCh38\/ucsc\/UCSC_hg38.refGene.gtf&quot;\n\nUCSC_cytoband &lt;- read.table(UCSC_cytoband_file, header = F, sep = &#039;\\t&#039;,quote = &quot;&quot;)\nUCSC_refgene &lt;- read.table(UCSC_refgene_gtf, header = F, sep = &#039;\\t&#039;,quote = &quot;&quot;,comment.char = &quot;#&quot;)\n\n# process refgene gtf\nUCSC_refgene_split &lt;- UCSC_refgene %&gt;%\n  separate(V9, into = paste0(&quot;V9_&quot;, 1:5), sep = &quot;;&quot;, fill = &quot;right&quot;, extra = &quot;drop&quot;) \n\nUCSC_refgene_split &lt;- subset(UCSC_refgene_split,V3==&quot;transcript&quot;)[,c(1,4,5,9,10)]\ncolnames(UCSC_refgene_split) &lt;- c(&quot;Chr&quot;,&quot;Start&quot;,&quot;End&quot;,&quot;Gene&quot;,&quot;RefSeqName&quot;)\n\nUCSC_refgene_split &lt;- UCSC_refgene_split %&gt;%\n  mutate(Gene = gsub(&#039;.*&quot;([^&quot;]+)&quot;.*&#039;, &#039;\\\\1&#039;, Gene))\n\nUCSC_refgene_split &lt;- UCSC_refgene_split %&gt;%\n  mutate(RefSeqName = gsub(&#039;.*&quot;([^&quot;]+)&quot;.*&#039;, &#039;\\\\1&#039;, RefSeqName))\n\nUCSC_refgene_split &lt;- subset(UCSC_refgene_split,Chr %in% c(&quot;chr1&quot;,&quot;chr2&quot;,&quot;chr3&quot;,&quot;chr4&quot;,&quot;chr5&quot;,&quot;chr6&quot;,&quot;chr7&quot;,&quot;chr8&quot;,&quot;chr9&quot;,&quot;chr10&quot;,&quot;chr11&quot;,&quot;chr12&quot;,&quot;chr13&quot;,&quot;chr14&quot;,&quot;chr15&quot;,&quot;chr16&quot;,&quot;chr17&quot;,&quot;chr18&quot;,&quot;chr19&quot;,&quot;chr20&quot;,&quot;chr21&quot;,&quot;chr22&quot;))\n\n# process cytoband\nUCSC_cytoband &lt;- subset(UCSC_cytoband,V1 %in% c(&quot;chr1&quot;,&quot;chr2&quot;,&quot;chr3&quot;,&quot;chr4&quot;,&quot;chr5&quot;,&quot;chr6&quot;,&quot;chr7&quot;,&quot;chr8&quot;,&quot;chr9&quot;,&quot;chr10&quot;,&quot;chr11&quot;,&quot;chr12&quot;,&quot;chr13&quot;,&quot;chr14&quot;,&quot;chr15&quot;,&quot;chr16&quot;,&quot;chr17&quot;,&quot;chr18&quot;,&quot;chr19&quot;,&quot;chr20&quot;,&quot;chr21&quot;,&quot;chr22&quot;))\nUCSC_cytoband &lt;- UCSC_cytoband[,1:4]\ncolnames(UCSC_cytoband) &lt;- c(&quot;Chr&quot;,&quot;Start&quot;,&quot;End&quot;,&quot;Cytoband&quot;)\n\n# Add the Cytoband annotation to UCSC_refgene_split\nUCSC_refgene_split$Cytoband &lt;- NA\n\n# Loop through each chromosome to match positions\nfor (chr in unique(UCSC_refgene_split$Chr)) {\n  # Filter rows for the current chromosome\n  refgene_chr &lt;- UCSC_refgene_split %&gt;% filter(Chr == chr)\n  cytoband_chr &lt;- UCSC_cytoband %&gt;% filter(Chr == chr)\n\n  # Create IRanges objects for the current chromosome\n  refgene_ranges &lt;- IRanges(start = refgene_chr$Start, end = refgene_chr$End)\n  cytoband_ranges &lt;- IRanges(start = cytoband_chr$Start, end = cytoband_chr$End)\n\n  # Find overlaps\n  overlaps &lt;- findOverlaps(refgene_ranges, cytoband_ranges)\n\n  # Map Cytoband values back to UCSC_refgene_split\n  UCSC_refgene_split$Cytoband[UCSC_refgene_split$Chr == chr][queryHits(overlaps)] &lt;- \n    cytoband_chr$Cytoband[subjectHits(overlaps)]\n}\n\nUCSC_refgene_split &lt;- UCSC_refgene_split[,c(&quot;Chr&quot;,&quot;Start&quot;,&quot;End&quot;,&quot;Cytoband&quot;,&quot;Gene&quot;,&quot;RefSeqName&quot;)]\nwrite.table(UCSC_refgene_split,&quot;\/home\/zhoukaiwen\/software\/BISCUT-py3-main\/docs\/UCSC_hg38_geneloc.txt&quot;,col.names = T,row.names = F,sep = &#039;\\t&#039;,quote = F)<\/code><\/pre>\n<ul>\n<li>\u6b64\u540e\u6309\u9700\u8981\u53ef\u80fd\u8981\u5220\u9664\u6587\u4ef6\u4e2dChr\u5217\u7684\u201cchr\u201d\u524d\u7f00\uff0c\u5e76\u63d0\u53d6unique\u7684\u57fa\u56e0\u8f6c\u5f55\u672c<\/li>\n<\/ul>\n<h2>\u8bbe\u5b9a\u9ed8\u8ba4shell<\/h2>\n<pre><code>\u5728home\u76ee\u5f55\u9996\u5148\u6784\u5efatmp\u6587\u4ef6\u5939\nmkdir \/home\/zhoukaiwen\/tmp\n\u5728tmp\u6587\u4ef6\u5939\u4e2d\u6784\u5efa\u8f6f\u94fe\u63a5,\u5426\u5219\u540e\u7eedreticulate\u5305\u53ef\u80fd\u4f1a\u62a5\u9519\uff0c\u56e0\u9ed8\u8ba4\u7684\/usr\/bin\/sh\u9ed8\u8ba4\u6307\u5411dash\uff0c\u4e0ebash\u7684\u8bed\u8a00\u683c\u5f0f\u4e0d\u540c\nln -s \/bin\/bash \/home\/zhoukaiwen\/tmp\/sh<\/code><\/pre>\n<h2>\u4fee\u6539R\u811a\u672c<\/h2>\n<pre><code class=\"language-R\"># \u4fee\u6539\u6700\u540e\u7684files = list.files(path = paste(resultsfolder,&#039;\/stats&#039;,sep=&#039;&#039;))\u4e3a\u4ee5\u4e0b\uff0c\u5426\u5219\u4f1a\u8bfb\u53d6result\u6587\u4ef6\u5939\u4e0b\u7684summary\u6587\u4ef6\u5939\u540d\u4ece\u800c\u62a5\u9519\nfiles = list.files(path = paste(resultsfolder,&#039;\/stats&#039;,sep=&#039;&#039;),pattern = &quot;\\\\.rds$&quot;)\n\n# \u5728BISCUT_peaks_finding.R \u6700\u540e\u7684source_python\u524d\u6dfb\u52a0\u4ee5\u4e0b\u4ee3\u7801\u6765\u7ed5\u5f00R\u5305reticulate\u4e2d\u7684Sys.which(&quot;sh&quot;)\u94fe\u63a5\u5230\u201c\/usr\/bin\/sh\u201d\nSys.setenv(PATH = paste(&quot;\/home\/zhoukaiwen\/tmp&quot;, Sys.getenv(&quot;PATH&quot;), sep = &quot;:&quot;))\nSys.which(&quot;sh&quot;)<\/code><\/pre>\n<h2>BISCUT\u7ed3\u679c\u6587\u4ef6<\/h2>\n<p>BISCUT \u7ed3\u679c\u6587\u4ef6\u6709\uff1a<\/p>\n<ol>\n<li>\u5728\u8bbe\u5b9a\u7684results\u6587\u4ef6\u5939\u4e0b\u7684all_BISCUT_results.txt\uff0c\u5982\u679c\u6709\u591a\u4e2a\u80bf\u7624\u7c7b\u578b\uff0c\u8fd9\u91cc\u662f\u6574\u5408\u4e86\u6240\u6709\u7c7b\u578b\u7684<\/li>\n<li>\u5728results\u6587\u4ef6\u5939\u4e0b\u6709\u4ee5\u8bbe\u5b9a\u7684\u80bf\u7624\u7c7b\u578b\u547d\u540d\u7684\u6587\u4ef6\u5939\uff0c\u5982\u4e0a\u9762\u8bbe\u7f6e\u7684LiverOld\uff0c\u8fd9\u4e2a\u6587\u4ef6\u5939\u4e0b\u53c8\u6709\u4e00\u4e2asummary\u6587\u4ef6\u5939\uff0c\u91cc\u9762\u7684LiverOld_BISCUT_results.txt\u548cLiverOld_BISCUT_results_cols_0.95.txt\u4e3a\u8fd9\u4e2a\u80bf\u7624\u7c7b\u578b\u5355\u72ec\u7684\u7ed3\u679c\u6587\u4ef6<\/li>\n<\/ol>\n<h2>\u901a\u8fc7BISCUT\u7684\u7ed3\u679c\u6587\u4ef6\u8ba1\u7b97peak\u548cgene-level\u7684Relative fitness<\/h2>\n<pre><code class=\"language-R\">gc()\nrm(list=ls())\n\n# Settings ####\nn_cores &lt;- 8 # numbers of CPU cores for parallelization over chromosome arms\nset.seed(123456789) #random seed\n\n# Settings for Liver Old \u2014\u2014 Self background\ntumor_type &lt;- &quot;LiverOld&quot;\nbiscut_peak_file &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_09_0.95\/LiverOld\/summary\/LiverOld_BISCUT_results_cols_0.95.txt&quot;\nbiscut_gene_file &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_09_0.95\/LiverOld\/summary\/LiverOld_BISCUT_results.txt&quot;\nbreakpoint_file_path &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/breakpoint_files_2025_01_09\/LiverOld&quot;\nabslocs_file &lt;- &#039;~\/software\/BISCUT-py3-main\/docs\/SNP6_hg19_chromosome_locs_200605.txt&#039;\noutput_path &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_09_0.95&quot;\n\n# Settings for Liver Young \u2014\u2014 Self background\ntumor_type &lt;- &quot;LiverYoung&quot;\nbiscut_peak_file &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_09_0.95\/LiverYoung\/summary\/LiverYoung_BISCUT_results_cols_0.95.txt&quot;\nbiscut_gene_file &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_09_0.95\/LiverYoung\/summary\/LiverYoung_BISCUT_results.txt&quot;\nbreakpoint_file_path &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/breakpoint_files_2025_01_09\/LiverYoung&quot;\nabslocs_file &lt;- &#039;~\/software\/BISCUT-py3-main\/docs\/SNP6_hg19_chromosome_locs_200605.txt&#039;\noutput_path &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_09_0.95&quot;\n\n# Settings for Liver Old \u2014\u2014 PANCAN background\ntumor_type &lt;- &quot;LiverOld&quot;\nbiscut_peak_file &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_14_0.95\/LiverOld\/summary\/LiverOld_BISCUT_results_cols_0.95.txt&quot;\nbiscut_gene_file &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_14_0.95\/LiverOld\/summary\/LiverOld_BISCUT_results.txt&quot;\nbreakpoint_file_path &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/breakpoint_files_2025_01_14\/LiverOld&quot;\nabslocs_file &lt;- &#039;~\/software\/BISCUT-py3-main\/docs\/SNP6_hg19_chromosome_locs_200605.txt&#039;\noutput_path &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_14_0.95&quot;\n\n# Settings for Liver Young \u2014\u2014 PANCAN background\ntumor_type &lt;- &quot;LiverYoung&quot;\nbiscut_peak_file &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_14_0.95\/LiverYoung\/summary\/LiverYoung_BISCUT_results_cols_0.95.txt&quot;\nbiscut_gene_file &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_14_0.95\/LiverYoung\/summary\/LiverYoung_BISCUT_results.txt&quot;\nbreakpoint_file_path &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/breakpoint_files_2025_01_14\/LiverYoung&quot;\nabslocs_file &lt;- &#039;~\/software\/BISCUT-py3-main\/docs\/SNP6_hg19_chromosome_locs_200605.txt&#039;\noutput_path &lt;- &quot;\/home\/zhoukaiwen\/algorithm_learn\/output\/biscut\/results_2025_01_14_0.95&quot;\n\n# Step0: Load Library ####\nlibrary(ismev)\nlibrary(extRemes)\nlibrary(fitdistrplus)\nlibrary(truncdist)\nlibrary(segmented)\nlibrary(dplyr)\nlibrary(reticulate)\nlibrary(foreach)\nlibrary(doParallel)\nlibrary(tidyr,lib.loc = &quot;\/usr\/lib\/R\/site-library&quot;)\n\nregisterDoParallel(n_cores)\ngetDoParWorkers()\n\n# Step0: Load telomere\/centromere location files ####\nmessage(&quot;Step0: Loading telomere\/centromere location file...&quot;)\nabslocs &lt;- read.table(abslocs_file,sep=&#039;\\t&#039;,header=T)\nmessage(&quot;Step0: Load Done!&quot;)\n\n# Step1: Process BISCUT identified peak file and obtain peak regions ####\nmessage(&quot;Step1: Loading BISCUT identified peak files...&quot;)\nbiscut_peak &lt;- read.csv(biscut_peak_file,sep=&#039;\\t&#039;,header=T)\nbiscut_gene &lt;- read.csv(biscut_gene_file,sep=&#039;\\t&#039;,header=T)\n\nmessage(&quot;Step1: Extracting BISCUT identified peaks...&quot;)\nbiscut_peak &lt;- as.data.frame(t(biscut_peak))\ncolnames(biscut_peak) &lt;- biscut_peak[1,]\nbiscut_peak &lt;- biscut_peak[-1,]\nbiscut_peak_loc &lt;- biscut_peak$peak_location\nbiscut_peak_loc &lt;- data.frame(biscut_peak_loc) %&gt;%\n  separate(biscut_peak_loc, into = c(&quot;Chr&quot;, &quot;Positions&quot;), sep = &quot;:&quot;) %&gt;%\n  separate(Positions, into = c(&quot;Start&quot;, &quot;End&quot;), sep = &quot;-&quot;)\nbiscut_peak_loc$Chr &lt;- gsub(&quot;chr&quot;,&quot;&quot;,biscut_peak_loc$Chr)\nbiscut_peak_loc$Chr &lt;- as.integer(biscut_peak_loc$Chr)\nbiscut_peak_loc$direction &lt;- biscut_peak$direction\nbiscut_peak_loc$telcent &lt;- biscut_peak$`telomeric or centromeric`\n\nbiscut_peak_loc_abslocs &lt;- biscut_peak_loc %&gt;%\n  left_join(abslocs, by = c(&quot;Chr&quot; = &quot;chromosome_info&quot;))\nbiscut_peak_loc_abslocs$Chr &lt;- as.character(biscut_peak_loc_abslocs$Chr)\n\nbiscut_peak_loc_abslocs$ArmType &lt;- &quot;NA&quot;\nbiscut_peak_loc_abslocs$Start &lt;- as.integer(biscut_peak_loc_abslocs$Start)\nbiscut_peak_loc_abslocs$End &lt;- as.integer(biscut_peak_loc_abslocs$End)\n\nbiscut_peak_loc_abslocs$ArmType[which(biscut_peak_loc_abslocs$Start &gt; biscut_peak_loc_abslocs$p_start)] &lt;- &quot;p&quot;\nbiscut_peak_loc_abslocs$ArmType[which(biscut_peak_loc_abslocs$Start &gt; biscut_peak_loc_abslocs$q_start)] &lt;- &quot;q&quot;\n\nbiscut_peak_loc_abslocs$ArmLength &lt;- &quot;NA&quot;\nbiscut_peak_loc_abslocs$ArmLength[which(biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)] &lt;- biscut_peak_loc_abslocs$p_end[which(biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)]-biscut_peak_loc_abslocs$p_start[which(biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)]\nbiscut_peak_loc_abslocs$ArmLength[which(biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)] &lt;- biscut_peak_loc_abslocs$q_end[which(biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)]-biscut_peak_loc_abslocs$q_start[which(biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)]\nbiscut_peak_loc_abslocs$ArmLength &lt;- as.integer(biscut_peak_loc_abslocs$ArmLength)\n\nbiscut_peak_loc_abslocs$ArmStart&lt;- &quot;NA&quot;\nbiscut_peak_loc_abslocs$ArmStart[which(biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)] &lt;- biscut_peak_loc_abslocs$p_start[which(biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)]\nbiscut_peak_loc_abslocs$ArmStart[which(biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)] &lt;- biscut_peak_loc_abslocs$q_start[which(biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)]\nbiscut_peak_loc_abslocs$ArmStart &lt;- as.integer(biscut_peak_loc_abslocs$ArmStart)\n\nbiscut_peak_loc_abslocs$Start_percent &lt;- &quot;NA&quot;\nbiscut_peak_loc_abslocs$Start_percent &lt;- (biscut_peak_loc_abslocs$Start-biscut_peak_loc_abslocs$ArmStart)\/biscut_peak_loc_abslocs$ArmLength\nbiscut_peak_loc_abslocs$Start_percent[which(biscut_peak_loc_abslocs$Start_percent&lt;0)] &lt;- 0\nbiscut_peak_loc_abslocs$Start_percent[which(biscut_peak_loc_abslocs$Start_percent&gt;1)] &lt;- 1\n\nbiscut_peak_loc_abslocs$End_percent &lt;- &quot;NA&quot;\nbiscut_peak_loc_abslocs$End_percent &lt;- (biscut_peak_loc_abslocs$End-biscut_peak_loc_abslocs$ArmStart)\/biscut_peak_loc_abslocs$ArmLength\nbiscut_peak_loc_abslocs$End_percent[which(biscut_peak_loc_abslocs$End_percent&lt;0)] &lt;- 0\nbiscut_peak_loc_abslocs$End_percent[which(biscut_peak_loc_abslocs$End_percent&gt;1)] &lt;- 1\n\nbiscut_peak_loc_abslocs$Peak_Length &lt;- biscut_peak_loc_abslocs$End-biscut_peak_loc_abslocs$Start\nbiscut_peak_loc_abslocs$Peak_Length_percent &lt;- biscut_peak_loc_abslocs$Peak_Length\/biscut_peak_loc_abslocs$ArmLength\n\n# Reverse the direction peaks of cent-bounded on p and tel-bounded on q to make them start on 0\nbiscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)),]$Start_percent &lt;- 1-(biscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)),]$Start_percent)\nbiscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)),]$End_percent &lt;- 1-(biscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)),]$End_percent)\ntmp_peak_start &lt;- biscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)),]$End_percent\ntmp_peak_end &lt;- biscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)),]$Start_percent\nbiscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)),]$Start_percent &lt;- tmp_peak_start\nbiscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;p&quot;)),]$End_percent &lt;- tmp_peak_end\n\nbiscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)),]$Start_percent &lt;- 1-(biscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)),]$Start_percent)\nbiscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)),]$End_percent &lt;- 1-(biscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)),]$End_percent)\ntmp_peak_start &lt;- biscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)),]$End_percent\ntmp_peak_end &lt;- biscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)),]$Start_percent\nbiscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)),]$Start_percent &lt;- tmp_peak_start\nbiscut_peak_loc_abslocs[(which(biscut_peak_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_peak_loc_abslocs$ArmType==&quot;q&quot;)),]$End_percent &lt;- tmp_peak_end\n\nbiscut_peak_loc_abslocs$Chr &lt;- as.integer(biscut_peak_loc_abslocs$Chr)\nbiscut_peak_loc_abslocs &lt;- biscut_peak_loc_abslocs[order(biscut_peak_loc_abslocs$Chr),]\n\nmessage(&quot;Step1: Extracting BISCUT identified genes on peaks...&quot;)\nbiscut_gene &lt;- biscut_gene[,c(&quot;Chr&quot;,&quot;Gene&quot;,&quot;Start&quot;,&quot;End&quot;,&quot;Peak.Start&quot;,&quot;Peak.End&quot;,&quot;arm&quot;,&quot;direction&quot;,&quot;telcent&quot;)]\nbiscut_gene_loc_abslocs &lt;- biscut_gene %&gt;%\n  left_join(abslocs, by = c(&quot;Chr&quot; = &quot;chromosome_info&quot;))\n\nbiscut_gene_loc_abslocs$ArmType &lt;- &quot;NA&quot;\nbiscut_gene_loc_abslocs$Start &lt;- as.integer(biscut_gene_loc_abslocs$Start)\nbiscut_gene_loc_abslocs$End &lt;- as.integer(biscut_gene_loc_abslocs$End)\n\nbiscut_gene_loc_abslocs$ArmType[which(biscut_gene_loc_abslocs$Start &gt; biscut_gene_loc_abslocs$p_start)] &lt;- &quot;p&quot;\nbiscut_gene_loc_abslocs$ArmType[which(biscut_gene_loc_abslocs$Start &gt; biscut_gene_loc_abslocs$q_start)] &lt;- &quot;q&quot;\n\nbiscut_gene_loc_abslocs$ArmLength &lt;- &quot;NA&quot;\nbiscut_gene_loc_abslocs$ArmLength[which(biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)] &lt;- biscut_gene_loc_abslocs$p_end[which(biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)]-biscut_gene_loc_abslocs$p_start[which(biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)]\nbiscut_gene_loc_abslocs$ArmLength[which(biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)] &lt;- biscut_gene_loc_abslocs$q_end[which(biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)]-biscut_gene_loc_abslocs$q_start[which(biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)]\nbiscut_gene_loc_abslocs$ArmLength &lt;- as.integer(biscut_gene_loc_abslocs$ArmLength)\n\nbiscut_gene_loc_abslocs$ArmStart&lt;- &quot;NA&quot;\nbiscut_gene_loc_abslocs$ArmStart[which(biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)] &lt;- biscut_gene_loc_abslocs$p_start[which(biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)]\nbiscut_gene_loc_abslocs$ArmStart[which(biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)] &lt;- biscut_gene_loc_abslocs$q_start[which(biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)]\nbiscut_gene_loc_abslocs$ArmStart &lt;- as.integer(biscut_gene_loc_abslocs$ArmStart)\n\nbiscut_gene_loc_abslocs$Start_percent &lt;- &quot;NA&quot;\nbiscut_gene_loc_abslocs$Start_percent &lt;- (biscut_gene_loc_abslocs$Start-biscut_gene_loc_abslocs$ArmStart)\/biscut_gene_loc_abslocs$ArmLength\nbiscut_gene_loc_abslocs$Start_percent[which(biscut_gene_loc_abslocs$Start_percent&lt;0)] &lt;- 0\nbiscut_gene_loc_abslocs$Start_percent[which(biscut_gene_loc_abslocs$Start_percent&gt;1)] &lt;- 1\n\nbiscut_gene_loc_abslocs$End_percent &lt;- &quot;NA&quot;\nbiscut_gene_loc_abslocs$End_percent &lt;- (biscut_gene_loc_abslocs$End-biscut_gene_loc_abslocs$ArmStart)\/biscut_gene_loc_abslocs$ArmLength\nbiscut_gene_loc_abslocs$End_percent[which(biscut_gene_loc_abslocs$End_percent&lt;0)] &lt;- 0\nbiscut_gene_loc_abslocs$End_percent[which(biscut_gene_loc_abslocs$End_percent&gt;1)] &lt;- 1\n\nbiscut_gene_loc_abslocs$Gene_Length &lt;- biscut_gene_loc_abslocs$End-biscut_gene_loc_abslocs$Start\nbiscut_gene_loc_abslocs$Gene_Length_percent &lt;- biscut_gene_loc_abslocs$Gene_Length\/biscut_gene_loc_abslocs$ArmLength\n\n# Reverse the direction genes of cent-bounded on p and tel-bounded on q to make them start on 0\nbiscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)),]$Start_percent &lt;- 1-(biscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)),]$Start_percent)\nbiscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)),]$End_percent &lt;- 1-(biscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)),]$End_percent)\ntmp_gene_start &lt;- biscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)),]$End_percent\ntmp_gene_end &lt;-biscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)),]$Start_percent\nbiscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)),]$Start_percent &lt;- tmp_gene_start\nbiscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;cent&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;p&quot;)),]$End_percent &lt;- tmp_gene_end\n\nbiscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)),]$Start_percent &lt;- 1-(biscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)),]$Start_percent)\nbiscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)),]$End_percent &lt;- 1-(biscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)),]$End_percent)\ntmp_gene_start &lt;- biscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)),]$End_percent\ntmp_gene_end &lt;-biscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)),]$Start_percent\nbiscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)),]$Start_percent &lt;- tmp_gene_start\nbiscut_gene_loc_abslocs[(which(biscut_gene_loc_abslocs$telcent==&quot;tel&quot; &amp; biscut_gene_loc_abslocs$ArmType==&quot;q&quot;)),]$End_percent &lt;- tmp_gene_end\n\nbiscut_gene_loc_abslocs$Chr &lt;- as.integer(biscut_gene_loc_abslocs$Chr)\nbiscut_gene_loc_abslocs &lt;- biscut_gene_loc_abslocs[order(biscut_gene_loc_abslocs$Chr),]\n\nmessage(&quot;Step1 All Done!&quot;)\n\n# Step2: Process breakpoint file and count the number of true telomere\/centromere-bounded SCNA breakpoints between peaks ####\nmessage(&quot;Step2: Extracting breakpoint peak file...&quot;)\n\n# Extract the 4 rows necessary for later calculation\nbiscut_peak_loc_abslocs_unique &lt;- unique(biscut_peak_loc_abslocs[, c(&quot;Chr&quot;, &quot;direction&quot;, &quot;telcent&quot;, &quot;ArmType&quot;)])\n\n# Calculate the max number of peaks within one chr arm\ndup_count &lt;- biscut_peak_loc_abslocs %&gt;%\n  group_by(Chr, direction, telcent, ArmType) %&gt;%\n  summarise(count = n(), .groups = &quot;drop&quot;) %&gt;%\n  as.data.frame(.)\n\n# Create breakpoint_filenames to store paths of telomere\/centromere-bounded SCNA breakpoint files\nbreakpoint_filenames &lt;- unique(apply(biscut_peak_loc_abslocs_unique, 1, function(row) {\n  paste(breakpoint_file_path,&quot;\/&quot;,tumor_type,&quot;_&quot;,row[&quot;Chr&quot;], row[&quot;ArmType&quot;], &quot;_&quot;, row[&quot;direction&quot;], &quot;_&quot;,row[&quot;telcent&quot;], &quot;.txt&quot;,sep = &quot;&quot;)\n}))\n\n# Change the name for chr 13,14,15,21,22 to match the names of breakpoint filenames\nbreakpoint_filenames &lt;- gsub(&quot;_13p&quot;,&quot;_13&quot;,breakpoint_filenames)\nbreakpoint_filenames &lt;- gsub(&quot;_13q&quot;,&quot;_13&quot;,breakpoint_filenames)\nbreakpoint_filenames &lt;- gsub(&quot;_14p&quot;,&quot;_14&quot;,breakpoint_filenames)\nbreakpoint_filenames &lt;- gsub(&quot;_14q&quot;,&quot;_14&quot;,breakpoint_filenames)\nbreakpoint_filenames &lt;- gsub(&quot;_15p&quot;,&quot;_15&quot;,breakpoint_filenames)\nbreakpoint_filenames &lt;- gsub(&quot;_15q&quot;,&quot;_15&quot;,breakpoint_filenames)\nbreakpoint_filenames &lt;- gsub(&quot;_21p&quot;,&quot;_21&quot;,breakpoint_filenames)\nbreakpoint_filenames &lt;- gsub(&quot;_21q&quot;,&quot;_21&quot;,breakpoint_filenames)\nbreakpoint_filenames &lt;- gsub(&quot;_22p&quot;,&quot;_22&quot;,breakpoint_filenames)\nbreakpoint_filenames &lt;- gsub(&quot;_22q&quot;,&quot;_22&quot;,breakpoint_filenames)\nbreakpoint_filenames &lt;- gsub(&quot; &quot;,&quot;&quot;,breakpoint_filenames)\n\n# Calculate the max number of segments(between BISCUT peaks) in one arm\nn = max(dup_count$count)+1\n\n# Create column names for Expected and True breakpoint count within one segment\ncol_names &lt;- paste0(&quot;E_&quot;, 0:(n-1), &quot;_&quot;, 1:n)\nnew_rows_Expected &lt;- as.data.frame(matrix(nrow = nrow(biscut_peak_loc_abslocs_unique), ncol = length(col_names)))\ncolnames(new_rows_Expected) &lt;- col_names\nbiscut_peak_loc_abslocs_unique &lt;- cbind(biscut_peak_loc_abslocs_unique, new_rows_Expected)\n\ncol_names &lt;- paste0(&quot;T_&quot;, 0:(n-1), &quot;_&quot;, 1:n)\nnew_rows_True &lt;- as.data.frame(matrix(nrow = nrow(biscut_peak_loc_abslocs_unique), ncol = length(col_names)))\ncolnames(new_rows_True) &lt;- col_names\nbiscut_peak_loc_abslocs_unique &lt;- cbind(biscut_peak_loc_abslocs_unique, new_rows_True)\n\n# Set the max number of peak in one chr arm\nbiscut_peak_n &lt;- max(dup_count$count)\n\n# Set names for peak start\ncol_names &lt;- paste0(&quot;biscut_peak_start_&quot;, 1:biscut_peak_n)\nnew_rows_biscut_ps &lt;- as.data.frame(matrix(nrow = nrow(biscut_peak_loc_abslocs_unique), ncol = length(col_names)))\ncolnames(new_rows_biscut_ps) &lt;- col_names\nbiscut_peak_loc_abslocs_unique &lt;- cbind(biscut_peak_loc_abslocs_unique, new_rows_biscut_ps)\n\n# Set names for peak end\ncol_names &lt;- paste0(&quot;biscut_peak_end_&quot;, 1:biscut_peak_n)\nnew_rows_biscut_pe &lt;- as.data.frame(matrix(nrow = nrow(biscut_peak_loc_abslocs_unique), ncol = length(col_names)))\ncolnames(new_rows_biscut_pe) &lt;- col_names\nbiscut_peak_loc_abslocs_unique &lt;- cbind(biscut_peak_loc_abslocs_unique, new_rows_biscut_pe)\n\n# Set names for peak length as percentage\ncol_names &lt;- paste0(&quot;biscut_peak_length_percent_&quot;, 1:biscut_peak_n)\nnew_rows_biscut_plp &lt;- as.data.frame(matrix(nrow = nrow(biscut_peak_loc_abslocs_unique), ncol = length(col_names)))\ncolnames(new_rows_biscut_plp) &lt;- col_names\nbiscut_peak_loc_abslocs_unique &lt;- cbind(biscut_peak_loc_abslocs_unique, new_rows_biscut_plp)\n\n# Read breakpoint_filenames\nmessage(&quot;Matching breakpoint files with BISCUT peak files...&quot;)\ncount_n = 1\nfor (filenames in breakpoint_filenames){\n  tmp_file &lt;-  read.csv(filenames,sep=&#039;\\t&#039;,header=T)\n  tmp_file &lt;- subset(tmp_file,percent&gt;0) # Extract samples with SCNA length in the chr arm\n  tmp_count_df &lt;- as.data.frame(table(tmp_file$end)) # Extract SCNA breakpoint end as percentage and count its number in the chr arm\n  tmp_count_df$Var1 &lt;- as.numeric(as.character(tmp_count_df$Var1)) # Var1 correspond to the breakpoint end location in the chr arm, shown as percentage\n  tmp_count_df$Freq &lt;- as.numeric(as.character(tmp_count_df$Freq)) # Freq correspond to the count of the same breakpoints in the chr arm, shown as integer\n  message(&quot;Processing &quot;,biscut_peak_loc_abslocs_unique[count_n,]$Chr,biscut_peak_loc_abslocs_unique[count_n,]$ArmType,&quot; &quot;, biscut_peak_loc_abslocs_unique[count_n,]$telcent, &quot; &quot;,biscut_peak_loc_abslocs_unique[count_n,]$direction)\n  # Extract BISCUT indentified peak(peaks that located at the chr arm in breakpoint_filenames) locations(as percentage)\n  tmp_biscut_peak_start &lt;- subset(biscut_peak_loc_abslocs, Chr==biscut_peak_loc_abslocs_unique[count_n,]$Chr &amp; direction==biscut_peak_loc_abslocs_unique[count_n,]$direction &amp; telcent==biscut_peak_loc_abslocs_unique[count_n,]$telcent &amp; ArmType==biscut_peak_loc_abslocs_unique[count_n,]$ArmType)$Start_percent\n  tmp_biscut_peak_end &lt;- subset(biscut_peak_loc_abslocs, Chr==biscut_peak_loc_abslocs_unique[count_n,]$Chr &amp; direction==biscut_peak_loc_abslocs_unique[count_n,]$direction &amp; telcent==biscut_peak_loc_abslocs_unique[count_n,]$telcent &amp; ArmType==biscut_peak_loc_abslocs_unique[count_n,]$ArmType)$End_percent\n  tmp_biscut_peak_length_percent &lt;- subset(biscut_peak_loc_abslocs, Chr==biscut_peak_loc_abslocs_unique[count_n,]$Chr &amp; direction==biscut_peak_loc_abslocs_unique[count_n,]$direction &amp; telcent==biscut_peak_loc_abslocs_unique[count_n,]$telcent &amp; ArmType==biscut_peak_loc_abslocs_unique[count_n,]$ArmType)$Peak_Length_percent\n  message(&quot;Filtering telomere\/centromere-bounded SCNA breakpoints before BISCUT peak1 start: &quot;,min(tmp_biscut_peak_start))\n  tmp_count_df_subset1 &lt;- subset(tmp_count_df, Var1 &lt; min(tmp_biscut_peak_start)) # SCNA breakpoint end &lt; BISCUT peak start are identified as E_0_1\n  tmp_count_df_subset2 &lt;- subset(tmp_count_df, Var1 &lt; 1 &amp; Var1 &gt; max(tmp_biscut_peak_end)) # Extract the SCNA breakpoint end in the last segment(located before arm end(1) and after the last BISCUT end(max(tmp_biscut_peak_end))) \n  tmp_E_0_1 &lt;- sum(tmp_count_df_subset1$Freq) # Calculate the total count of Expected breakpoints in the first segment, which equals True total count of breakpoints\n  tmp_T_0_1 &lt;- sum(tmp_count_df_subset1$Freq) # Calculate the total count of True breakpoints in the first segment\n  tmp_T_1_2 &lt;- sum(tmp_count_df_subset2$Freq) # Calculate the total count of True breakpoints in the second segment, will change later if the number of BISCUT peak is larger than 1\n  biscut_peak_loc_abslocs_unique[count_n,]$E_0_1 &lt;- tmp_E_0_1\n  biscut_peak_loc_abslocs_unique[count_n,]$T_0_1 &lt;- tmp_T_0_1\n  if (length(tmp_biscut_peak_start)==1){\n    message(&quot;There&#039;s only 1 peak on &quot;, biscut_peak_loc_abslocs_unique[count_n,]$Chr,biscut_peak_loc_abslocs_unique[count_n,]$ArmType,&quot; &quot;, biscut_peak_loc_abslocs_unique[count_n,]$telcent, &quot; &quot;,biscut_peak_loc_abslocs_unique[count_n,]$direction)\n    message(&quot;Filtering telomere\/centromere-bounded SCNA breakpoints before arm end&quot;)\n    biscut_peak_loc_abslocs_unique[count_n,]$biscut_peak_start_1 &lt;- tmp_biscut_peak_start\n    biscut_peak_loc_abslocs_unique[count_n,]$biscut_peak_end_1 &lt;- tmp_biscut_peak_end\n    biscut_peak_loc_abslocs_unique[count_n,]$biscut_peak_length_percent_1 &lt;- tmp_biscut_peak_length_percent\n    tmp_count_df_subset2 &lt;- subset(tmp_count_df, Var1 &lt; 1 &amp; Var1 &gt; tmp_biscut_peak_end) # SCNA breakpoint end &lt; BISCUT peak start are identified as E_0_1\n    biscut_peak_loc_abslocs_unique[count_n,]$T_1_2 &lt;- tmp_T_1_2\n  }else if(length(tmp_biscut_peak_start)&gt;1){\n    message(&quot;There&#039;re &quot;,length(tmp_biscut_peak_start),&quot; peaks on &quot;, biscut_peak_loc_abslocs_unique[count_n,]$Chr,biscut_peak_loc_abslocs_unique[count_n,]$ArmType,&quot; &quot;, biscut_peak_loc_abslocs_unique[count_n,]$telcent, &quot; &quot;,biscut_peak_loc_abslocs_unique[count_n,]$direction)\n    for(peak_n in 1:length(tmp_biscut_peak_start)){\n      message(&quot;Processing peak &quot;, peak_n, &quot;...&quot;)\n      bps_col_name &lt;- paste0(&quot;biscut_peak_start_&quot;, peak_n)\n      bpe_col_name &lt;- paste0(&quot;biscut_peak_end_&quot;, peak_n)\n      bplp_col_name &lt;- paste0(&quot;biscut_peak_length_percent_&quot;, peak_n)\n      true_col_name &lt;- paste0(&quot;T_&quot;, peak_n-1, &quot;_&quot;, peak_n)\n      biscut_peak_loc_abslocs_unique[count_n,bps_col_name] &lt;- tmp_biscut_peak_start[peak_n]\n      biscut_peak_loc_abslocs_unique[count_n,bpe_col_name] &lt;- tmp_biscut_peak_end[peak_n]\n      biscut_peak_loc_abslocs_unique[count_n,bplp_col_name] &lt;- tmp_biscut_peak_length_percent[peak_n]\n      if (1 &lt; peak_n &amp; peak_n &lt; length(tmp_biscut_peak_start)){\n        tmp_count_df_subset &lt;- subset(tmp_count_df, Var1 &lt; tmp_biscut_peak_start[peak_n] &amp; Var1 &gt; tmp_biscut_peak_end[peak_n-1])\n        biscut_peak_loc_abslocs_unique[count_n,true_col_name] &lt;- sum(tmp_count_df_subset$Freq)\n      }else if(peak_n==length(tmp_biscut_peak_start)){\n        tmp_count_df_subset1 &lt;- subset(tmp_count_df, Var1 &lt; tmp_biscut_peak_start[peak_n] &amp; Var1 &gt; tmp_biscut_peak_end[peak_n-1])\n        tmp_count_df_subset2 &lt;- subset(tmp_count_df, Var1 &lt; 1 &amp; Var1 &gt; tmp_biscut_peak_end[peak_n])\n        true_col_name2 &lt;- paste0(&quot;T_&quot;, peak_n, &quot;_&quot;, peak_n+1)\n        biscut_peak_loc_abslocs_unique[count_n,true_col_name] &lt;- sum(tmp_count_df_subset1$Freq)\n        biscut_peak_loc_abslocs_unique[count_n,true_col_name2] &lt;- sum(tmp_count_df_subset2$Freq)\n      }\n    }\n  }\n  count_n = count_n+1\n}\nmessage(&quot;Matching BISCUT peak files with BISCUT gene files...&quot;)\ndup_count$MultiPeak &lt;- &quot;TRUE&quot;\ndup_count$MultiPeak[which(dup_count$count == 1)] &lt;- &quot;FALSE&quot;\n\ndup_peak &lt;- unique(biscut_peak_loc_abslocs[, c(&quot;Chr&quot;, &quot;direction&quot;, &quot;telcent&quot;, &quot;ArmType&quot;,&quot;Start&quot;,&quot;End&quot;,&quot;Start_percent&quot;,&quot;End_percent&quot;)])\ndup_peak &lt;- dup_peak[order(dup_peak$Start),]\ndup_peak &lt;- dup_peak %&gt;%\n  group_by(Chr, direction, telcent, ArmType) %&gt;%\n  mutate(\n    PeakNum = paste0(&quot;Peak&quot;, row_number()),        # Assign peak numbers\n    PeakNum = ifelse(row_number() == n(),         # Check if it&#039;s the last row in the group\n                     paste0(PeakNum, &quot;(Last)&quot;), \n                     PeakNum),\n    NextPeakStart = ifelse(grepl(&quot;\\\\(Last\\\\)&quot;, PeakNum), 1, lead(Start_percent)),  # Use lead(Start_percent) for next peak, 1 if Last\n    PriorPeakEnd = ifelse(row_number() == 1, 0, lag(End_percent))\n  ) %&gt;%\n  ungroup()\n\ncolnames(dup_peak)[which(colnames(dup_peak)==&quot;Start&quot;)] &lt;- &quot;Peak.Start&quot;\ncolnames(dup_peak)[which(colnames(dup_peak)==&quot;End&quot;)] &lt;- &quot;Peak.End&quot;\ncolnames(dup_peak)[which(colnames(dup_peak)==&quot;Start_percent&quot;)] &lt;- &quot;Peak.Start_percent&quot;\ncolnames(dup_peak)[which(colnames(dup_peak)==&quot;End_percent&quot;)] &lt;- &quot;Peak.End_percent&quot;\n\nbiscut_gene_loc_abslocs_peak &lt;- biscut_gene_loc_abslocs %&gt;%\n  left_join(dup_peak, by = c(&quot;Chr&quot; = &quot;Chr&quot;,&quot;direction&quot; = &quot;direction&quot;, &quot;telcent&quot; = &quot;telcent&quot;, &quot;ArmType&quot; = &quot;ArmType&quot;,&quot;Peak.Start&quot;=&quot;Peak.Start&quot;,&quot;Peak.End&quot;=&quot;Peak.End&quot;))\n\nmessage(&quot;Matching breakpoint files with BISCUT gene files&quot;)\nbiscut_gene_loc_abslocs_peak$E_0_1 &lt;- NA\nbiscut_gene_loc_abslocs_peak$E_1_2 &lt;- NA\nbiscut_gene_loc_abslocs_peak$T_0_1 &lt;- NA\nbiscut_gene_loc_abslocs_peak$T_1_2 &lt;- NA\n\ncount_n=1\nfor (filenames in breakpoint_filenames){\n  tmp_file &lt;-  read.csv(filenames,sep=&#039;\\t&#039;,header=T)\n  tmp_file &lt;- subset(tmp_file,percent&gt;0) # Extract samples with SCNA length in the chr arm\n  tmp_count_df &lt;- as.data.frame(table(tmp_file$end)) # Extract SCNA breakpoint end as percentage and count its number in the chr arm\n  tmp_count_df$Var1 &lt;- as.numeric(as.character(tmp_count_df$Var1)) # Var1 correspond to the breakpoint end location in the chr arm, shown as percentage\n  tmp_count_df$Freq &lt;- as.numeric(as.character(tmp_count_df$Freq)) # Freq correspond to the count of the same breakpoints in the chr arm, shown as integer\n  tmp_Chr &lt;- biscut_peak_loc_abslocs_unique[count_n,]$Chr\n  tmp_ArmType &lt;- biscut_peak_loc_abslocs_unique[count_n,]$ArmType\n  tmp_telcent &lt;- biscut_peak_loc_abslocs_unique[count_n,]$telcent\n  tmp_direction &lt;- biscut_peak_loc_abslocs_unique[count_n,]$direction\n  message(&quot;Processing &quot;,biscut_peak_loc_abslocs_unique[count_n,]$Chr,biscut_peak_loc_abslocs_unique[count_n,]$ArmType,&quot; &quot;, biscut_peak_loc_abslocs_unique[count_n,]$telcent, &quot; &quot;,biscut_peak_loc_abslocs_unique[count_n,]$direction)\n  # Matching BISCUT indentified genes\n  for (gn in 1:nrow(biscut_gene_loc_abslocs_peak)){\n    if(biscut_gene_loc_abslocs_peak[gn,]$Chr==tmp_Chr &amp; biscut_gene_loc_abslocs_peak[gn,]$ArmType==tmp_ArmType &amp; biscut_gene_loc_abslocs_peak[gn,]$telcent==tmp_telcent &amp; biscut_gene_loc_abslocs_peak[gn,]$direction==tmp_direction){\n      message(filenames)\n      tmp_biscut_gene_start &lt;- biscut_gene_loc_abslocs_peak[gn,]$Start_percent\n      tmp_biscut_gene_end &lt;- biscut_gene_loc_abslocs_peak[gn,]$End_percent\n      tmp_biscut_gene_length_percent &lt;- biscut_gene_loc_abslocs_peak[gn,]$Gene_Length_percent\n      tmp_prior_peak_end_percent &lt;- biscut_gene_loc_abslocs_peak[gn,]$PriorPeakEnd\n      tmp_next_peak_start_percent &lt;- biscut_gene_loc_abslocs_peak[gn,]$NextPeakStart\n      tmp_gene_name &lt;- biscut_gene_loc_abslocs_peak[gn,]$Gene\n      message(&quot;Filtering telomere\/centromere-bounded SCNA breakpoints before &quot;, tmp_gene_name,&quot; start: &quot;,tmp_biscut_gene_start)\n      tmp_count_df_subset1 &lt;- subset(tmp_count_df, Var1 &lt; tmp_biscut_gene_start &amp; Var1 &gt; tmp_prior_peak_end_percent) # SCNA breakpoint end &lt; BISCUT gene start are identified as E_0_1\n      tmp_count_df_subset2 &lt;- subset(tmp_count_df, Var1 &lt; tmp_next_peak_start_percent &amp; Var1 &gt; tmp_biscut_gene_end) # Extract the SCNA breakpoint end in the segment after the BISCUT gene end\n      tmp_E_0_1 &lt;- sum(tmp_count_df_subset1$Freq) # Calculate the total count of Expected breakpoints in the first segment, which equals True total count of breakpoints\n      tmp_T_0_1 &lt;- sum(tmp_count_df_subset1$Freq) # Calculate the total count of True breakpoints in the first segment\n      tmp_T_1_2 &lt;- sum(tmp_count_df_subset2$Freq) # Calculate the total count of True breakpoints in the second segment\n      biscut_gene_loc_abslocs_peak[gn,]$E_0_1 &lt;- tmp_E_0_1\n      biscut_gene_loc_abslocs_peak[gn,]$T_0_1 &lt;- tmp_T_0_1\n      biscut_gene_loc_abslocs_peak[gn,]$T_1_2 &lt;- tmp_T_1_2\n      message(&quot;Gene &quot;,gn,&quot; &quot;,tmp_gene_name,&quot;: &quot;,tmp_E_0_1,&quot;|&quot;,tmp_T_0_1,&quot;|&quot;,tmp_T_1_2)\n    }\n  }\n  count_n = count_n+1\n}\n\nmessage(&quot;Step2 All Done!&quot;)\n\n# Step3: Fit breakpoints to beta distribution to obtain alpha and beta value ####\nbiscut_peak_loc_abslocs_unique$alpha &lt;- NA\nbiscut_peak_loc_abslocs_unique$beta &lt;- NA\n\ncount_n = 1\nfor (filenames in breakpoint_filenames){\n  tmp_file &lt;-  read.csv(filenames,sep=&#039;\\t&#039;,header=T)\n  betafit &lt;- fitdist(tmp_file$percent,&#039;beta&#039;)\n  alpha = summary(betafit)$estimate[1]\n  beta = summary(betafit)$estimate[2]\n  biscut_peak_loc_abslocs_unique[count_n,&quot;alpha&quot;] &lt;- alpha\n  biscut_peak_loc_abslocs_unique[count_n,&quot;beta&quot;] &lt;- beta\n  count_n = count_n+1\n}\n\narm_alpha_beta &lt;- biscut_peak_loc_abslocs_unique[,c(&quot;Chr&quot;,&quot;direction&quot;,&quot;telcent&quot;,&quot;ArmType&quot;,&quot;alpha&quot;,&quot;beta&quot;)]\n\nbiscut_gene_loc_abslocs_peak &lt;- biscut_gene_loc_abslocs_peak %&gt;%\n  left_join(arm_alpha_beta, by = c(&quot;Chr&quot; = &quot;Chr&quot;,&quot;direction&quot; = &quot;direction&quot;, &quot;telcent&quot; = &quot;telcent&quot;, &quot;ArmType&quot; = &quot;ArmType&quot;))\n\nmessage(&quot;Step3 All Done!&quot;)\n\n# Step4: Use BISCUT peak to estimate expected count of telomere\/centromere-bounded SCNA breakpoints between peaks ####\nmessage(&quot;Calculating peak-level expected count of telomere\/centromere-bounded SCNA breakpoints&quot;)\nfor(n in 1:nrow(biscut_peak_loc_abslocs_unique)){\n  for(peak_n in 1:max(dup_count$count)){\n    expected_n_1_col_name &lt;- paste0(&quot;E_&quot;, peak_n-1, &quot;_&quot;, peak_n)\n    expected_n_col_name &lt;- paste0(&quot;E_&quot;, peak_n, &quot;_&quot;, peak_n+1)\n\n    bps_n_col_name &lt;- paste0(&quot;biscut_peak_start_&quot;, peak_n)\n    bpe_n_col_name &lt;- paste0(&quot;biscut_peak_end_&quot;, peak_n)\n    bplp_n_col_name &lt;- paste0(&quot;biscut_peak_length_percent_&quot;, peak_n)\n\n    bps_n1_col_name &lt;- paste0(&quot;biscut_peak_start_&quot;, peak_n+1)\n    bpe_n1_col_name &lt;- paste0(&quot;biscut_peak_end_&quot;, peak_n+1)\n    bplp_n1_col_name &lt;- paste0(&quot;biscut_peak_length_percent_&quot;, peak_n+1)\n\n    bps_n_1_col_name &lt;- paste0(&quot;biscut_peak_start_&quot;, peak_n-1)\n    bpe_n_1_col_name &lt;- paste0(&quot;biscut_peak_end_&quot;, peak_n-1)\n    bplp_n_1_col_name &lt;- paste0(&quot;biscut_peak_length_percent_&quot;, peak_n-1)\n\n    if (peak_n == 1){\n      if(bps_n1_col_name %in% colnames(biscut_peak_loc_abslocs_unique)){\n        if(is.na(biscut_peak_loc_abslocs_unique[n,bps_n1_col_name])==TRUE){\n          biscut_peak_loc_abslocs_unique[n,expected_n_col_name] &lt;- (pbeta(1,biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;])-pbeta(biscut_peak_loc_abslocs_unique[n,bpe_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;]))*biscut_peak_loc_abslocs_unique[n,expected_n_1_col_name]\/(pbeta(biscut_peak_loc_abslocs_unique[n,bps_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;]))\n        }else if(is.na(biscut_peak_loc_abslocs_unique[n,bps_n1_col_name])==FALSE){\n          biscut_peak_loc_abslocs_unique[n,expected_n_col_name] &lt;- (pbeta(biscut_peak_loc_abslocs_unique[n,bps_n1_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;])-pbeta(biscut_peak_loc_abslocs_unique[n,bpe_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;]))*biscut_peak_loc_abslocs_unique[n,expected_n_1_col_name]\/(pbeta(biscut_peak_loc_abslocs_unique[n,bps_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;]))\n        }\n      }else if(((bps_n1_col_name %in% colnames(biscut_peak_loc_abslocs_unique)==FALSE))){\n        biscut_peak_loc_abslocs_unique[n,expected_n_col_name] &lt;- (pbeta(1,biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;])-pbeta(biscut_peak_loc_abslocs_unique[n,bpe_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;]))*biscut_peak_loc_abslocs_unique[n,expected_n_1_col_name]\/(pbeta(biscut_peak_loc_abslocs_unique[n,bps_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;]))\n      }\n    }else if(1&lt;peak_n &amp; peak_n&lt;max(dup_count$count)){\n      if(is.na(biscut_peak_loc_abslocs_unique[n,bps_n_col_name])==FALSE &amp; is.na(biscut_peak_loc_abslocs_unique[n,bps_n1_col_name])==FALSE){\n        biscut_peak_loc_abslocs_unique[n,expected_n_col_name] &lt;- (pbeta(biscut_peak_loc_abslocs_unique[n,bps_n1_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;])-pbeta(biscut_peak_loc_abslocs_unique[n,bpe_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;]))*biscut_peak_loc_abslocs_unique[n,expected_n_1_col_name]\/(pbeta(biscut_peak_loc_abslocs_unique[n,bps_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;])-pbeta(biscut_peak_loc_abslocs_unique[n,bpe_n_1_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;])) \n      }else if(is.na(biscut_peak_loc_abslocs_unique[n,bps_n_col_name])==FALSE &amp; is.na(biscut_peak_loc_abslocs_unique[n,bps_n1_col_name])==TRUE){\n        biscut_peak_loc_abslocs_unique[n,expected_n_col_name] &lt;- (pbeta(1,biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;])-pbeta(biscut_peak_loc_abslocs_unique[n,bpe_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;]))*biscut_peak_loc_abslocs_unique[n,expected_n_1_col_name]\/(pbeta(biscut_peak_loc_abslocs_unique[n,bps_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;])-pbeta(biscut_peak_loc_abslocs_unique[n,bpe_n_1_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;]))\n      }\n    }else if(peak_n==max(dup_count$count)){\n      if(is.na(biscut_peak_loc_abslocs_unique[n,bps_n_col_name])==FALSE){\n        biscut_peak_loc_abslocs_unique[n,expected_n_col_name] &lt;- (pbeta(1,biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;])-pbeta(biscut_peak_loc_abslocs_unique[n,bpe_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;]))*biscut_peak_loc_abslocs_unique[n,expected_n_1_col_name]\/(pbeta(biscut_peak_loc_abslocs_unique[n,bps_n_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;])-pbeta(biscut_peak_loc_abslocs_unique[n,bpe_n_1_col_name],biscut_peak_loc_abslocs_unique[n,&quot;alpha&quot;],biscut_peak_loc_abslocs_unique[n,&quot;beta&quot;]))\n      }\n    } \n  }\n}\n\nmessage(&quot;Calculating gene-level expected count of telomere\/centromere-bounded SCNA breakpoints&quot;)\n\nfor (n in 1:nrow(biscut_gene_loc_abslocs_peak)){\n  biscut_gene_loc_abslocs_peak[n,&quot;E_1_2&quot;] &lt;- (pbeta(biscut_gene_loc_abslocs_peak[n,&quot;NextPeakStart&quot;],biscut_gene_loc_abslocs_peak[n,&quot;alpha&quot;],biscut_gene_loc_abslocs_peak[n,&quot;beta&quot;])-pbeta(biscut_gene_loc_abslocs_peak[n,&quot;End_percent&quot;],biscut_gene_loc_abslocs_peak[n,&quot;alpha&quot;],biscut_gene_loc_abslocs_peak[n,&quot;beta&quot;]))*biscut_gene_loc_abslocs_peak[n,&quot;E_0_1&quot;]\/(pbeta(biscut_gene_loc_abslocs_peak[n,&quot;Start_percent&quot;],biscut_gene_loc_abslocs_peak[n,&quot;alpha&quot;],biscut_gene_loc_abslocs_peak[n,&quot;beta&quot;])-pbeta(biscut_gene_loc_abslocs_peak[n,&quot;PriorPeakEnd&quot;],biscut_gene_loc_abslocs_peak[n,&quot;alpha&quot;],biscut_gene_loc_abslocs_peak[n,&quot;beta&quot;])) \n}\n\nmessage(&quot;Step4 All Done!&quot;)\n\n# Step5: Divide Expected count of telomere\/centromere-bounded SCNA breakpoints with True telomere\/centromere-bounded SCNA breakpoints to infer fitness ####\ncol_names &lt;- paste0(&quot;biscut_peak&quot;, 1:biscut_peak_n,&quot;_RF&quot;)\nnew_rows_biscut_prf &lt;- as.data.frame(matrix(nrow = nrow(biscut_peak_loc_abslocs_unique), ncol = length(col_names)))\ncolnames(new_rows_biscut_prf) &lt;- col_names\nbiscut_peak_loc_abslocs_unique &lt;- cbind(biscut_peak_loc_abslocs_unique, new_rows_biscut_prf)\n\n# Calculate Peak-level Relative Fitness as (True breakpoint count)\/(Expected breakpoint count)\nfor(n in 1:nrow(biscut_peak_loc_abslocs_unique)){\n  for(peak_n in 1:max(dup_count$count)){\n    expected_n_col_name &lt;- paste0(&quot;E_&quot;, peak_n, &quot;_&quot;, peak_n+1)\n    true_n_col_name &lt;- paste0(&quot;T_&quot;, peak_n, &quot;_&quot;, peak_n+1)\n    RF_n_col_name &lt;- paste0(&quot;biscut_peak&quot;, peak_n, &quot;_RF&quot;)\n\n    biscut_peak_loc_abslocs_unique[n,RF_n_col_name] &lt;- (biscut_peak_loc_abslocs_unique[n,true_n_col_name]+0.000000001)\/(biscut_peak_loc_abslocs_unique[n,expected_n_col_name]+0.000000001)\n  }\n}\n\n# Calculate Gene-level Relative Fitness as (True breakpoint count)\/(Expected breakpoint count)\nbiscut_gene_loc_abslocs_peak$RF &lt;- (biscut_gene_loc_abslocs_peak$T_1_2+0.000000001)\/(biscut_gene_loc_abslocs_peak$E_1_2+0.000000001)\n\nmessage(&quot;Step5 All Done!&quot;)\nmessage(&quot;Writting Files to &quot;,output_path,&quot;\/&quot;,tumor_type)\n\nwrite.table(biscut_peak_loc_abslocs_unique,paste0(output_path,&quot;\/&quot;,tumor_type,&quot;_biscut_PANCAN_peak_RF.txt&quot;),col.names = T, row.names = F, sep = &#039;\\t&#039;, quote = F)\nwrite.table(biscut_gene_loc_abslocs_peak,paste0(output_path,&quot;\/&quot;,tumor_type,&quot;_biscut_PANCAN_gene_RF.txt&quot;),col.names = T, row.names = F, sep = &#039;\\t&#039;, quote = F)\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>Github\u7f51\u5740 \u5b89\u88c5 conda create -n biscut conda activate biscu&#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-390","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"_links":{"self":[{"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/posts\/390","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/comments?post=390"}],"version-history":[{"count":5,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/posts\/390\/revisions"}],"predecessor-version":[{"id":396,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/posts\/390\/revisions\/396"}],"wp:attachment":[{"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/media?parent=390"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/categories?post=390"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.kz-hub.tech\/index.php\/wp-json\/wp\/v2\/tags?post=390"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}