Mercurial > repos > iuc > control_freec

--- a/control_freec.xml	Thu Aug 13 09:50:35 2020 -0400
+++ b/control_freec.xml	Tue Aug 18 08:52:45 2020 -0400
@@ -13,11 +13,15 @@
             samtools faidx ./genome.fa 2>&1 || echo 'Error running samtools faidx for indexing fasta reference for control-freec' >&2 &&
         #else
             ln -s '$reference_source.ref.fields.path' ./genome.fa &&
-            ln -s '${reference_source.ref.fields.path}.fai' ./genome.fa.fai &&
+            cp '${reference_source.ref.fields.path}.fai' ./genome.fa.fai &&
         #end if

         #if int($WGS_WES.advanced_settings.window_section.window) == 0
             ln -s '$WGS_WES.input_capture_file' ./capture.bed &&
+
+            cat ./capture.bed | cut -f 1 | sort | uniq > ./capture.bed_tmp &&
+            cp ./genome.fa.fai ./genome.fa.fai_tmp &&
+            awk 'NR==FNR{A[$1];next}($1 in A)' ./capture.bed_tmp ./genome.fa.fai_tmp > ./genome.fa.fai &&
         #end if

         mkdir ./chromosomes &&
@@ -42,10 +46,7 @@
         #end if

         #if $output_section.circos_data
-            && python '$__tool_directory__/ratio2circos.py'
-               -i ./output/sample.bam_ratio.BedGraph
-               -p '$WGS_WES.advanced_settings.ploidy'
-               -o sample.bam_ratio_log2_circos.txt
+            && python '$__tool_directory__/ratio2circos.py' '$WGS_WES.advanced_settings.ploidy'
         #end if
     ]]></command>
     <configfiles>
@@ -159,7 +160,10 @@
         <data name="out_gc_profile" format="tabular" label="${tool.name} on ${on_string}: GC-content profile" from_work_dir="output/GC_profile.targetedRegions.cnp">
             <filter>int(WGS_WES['advanced_settings']['window_section']['window']) == 0</filter>
         </data>
-        <data name="out_ratio_log2_circos" format="tabular" label="${tool.name} on ${on_string}: Circos 2D-track data" from_work_dir="output/sample.bam_ratio_log2_circos.txt">
+        <data name="out_ratio_log2_circos" format="tabular" label="${tool.name} on ${on_string}: Circos Log2 Ratio (2D Data Track)" from_work_dir="output/sample.bam_ratio_log2_circos.txt">
+            <filter>output_section['circos_data']</filter>
+        </data>
+        <data name="out_chr_sorted_circos" format="tabular" label="${tool.name} on ${on_string}: Circos Karyotype" from_work_dir="output/karyotype_circos.txt">
             <filter>output_section['circos_data']</filter>
         </data>
     </outputs>
--- a/macros.xml	Thu Aug 13 09:50:35 2020 -0400
+++ b/macros.xml	Tue Aug 18 08:52:45 2020 -0400
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@WRAPPER_VERSION@">@TOOL_VERSION@+galaxy0</token>
+    <token name="@WRAPPER_VERSION@">@TOOL_VERSION@+galaxy1</token>
     <token name="@TOOL_VERSION@">11.6</token>
     <xml name="reference_interface">
         <conditional name="reference_source">
@@ -40,6 +40,8 @@
             <param name="window" type="integer" value="50000" label="Explicit window size" help="Higher priority than coefficientOfVariation. Ex: for whole genome sequencing: &quot;50000&quot;; for whole exome sequencing: &quot;0&quot;" />
             <param name="step" type="integer" value="10000" label="Step" help="Used only when &quot;window&quot; is specified. Do not use for exome sequencing (instead set &quot;0&quot;). Ex: 10000" />
         </section>
+        <param name="printNA" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Print NA to avoid &quot;-1&quot;" help="Set &quot;No&quot; to avoid printing &quot;-1&quot; to the _ratio.txt files. Useful for exome-seq or targeted sequencing data." />
+        <param name="noisyData" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Noisy Data" help="Set &quot;Yes&quot; for target resequencing data (e.g., exome-seq) to avoid false positive predictions due to non-uniform capture" />
     </xml>
     <xml name="WES">
         <param name="degree" type="select" label="Degree of polynomial" help="">
@@ -61,6 +63,8 @@
              <param name="window" type="integer" value="0" label="Explicit window size" help="Higher priority than coefficientOfVariation. Ex: for whole genome sequencing: &quot;50000&quot;; for whole exome sequencing: &quot;0&quot;" />
              <param name="step" type="integer" value="0" label="Step" help="Used only when &quot;window&quot; is specified. Do not use for exome sequencing (instead set &quot;0&quot;). Ex: 10000" />
         </section>
+        <param name="printNA" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Print NA to avoid &quot;-1&quot;" help="Set &quot;No&quot; to avoid printing &quot;-1&quot; to the _ratio.txt files. Useful for exome-seq or targeted sequencing data." />
+        <param name="noisyData" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Noisy Data" help="Set &quot;Yes&quot; for target resequencing data (e.g., exome-seq) to avoid false positive predictions due to non-uniform capture" />
     </xml>
     <xml name="other">
         <param name="degree" type="select" label="Degree of polynomial" help="">
@@ -82,6 +86,8 @@
             <param name="window" type="integer" value="50000" label="Explicit window size" help="Higher priority than coefficientOfVariation. Ex: for whole genome sequencing: &quot;50000&quot;; for whole exome sequencing: &quot;0&quot;" />
             <param name="step" type="integer" value="10000" label="Step" help="Used only when &quot;window&quot; is specified. Do not use for exome sequencing (instead set &quot;0&quot;). Ex: 10000" />
         </section>
+        <param name="printNA" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Print NA to avoid &quot;-1&quot;" help="Set &quot;No&quot; to avoid printing &quot;-1&quot; to the _ratio.txt files. Useful for exome-seq or targeted sequencing data." />
+        <param name="noisyData" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Noisy Data" help="Set &quot;Yes&quot; for target resequencing data (e.g., exome-seq) to avoid false positive predictions due to non-uniform capture" />
     </xml>
     <xml name="shared">
         <!-- general parameters -->
@@ -105,9 +111,7 @@
             <param name="minMappabilityPerWindow" type="float" label="Minimal mappability per window" value="0.85" min="0" max="1" help="Only windows with fraction of mappable positions higher than or equal to this threshold will be considered (if &quot;gemMappabilityFile&quot; is not provided, one uses the percentage of non-N letters per window)" />
             <param name="minExpectedGC" type="float" label="Minimal expected value of the GC-content" value="0.35" min="0" max="1" help="Minimal expected value of the GC-content for the prior evaluation of &quot;Read Count ~ GC-content&quot; dependency. Change only if you run Control-FREEC on a bacterial genome." />
             <param name="maxExpectedGC" type="float" label="Maximal expected value of the GC-content" value="0.55" min="0" max="1" help="Maximal expected value of the GC-content for the prior evaluation of &quot;Read Count ~ GC-content&quot; dependency. Change only if you run Control-FREEC on a bacterial genome." />
-            <param name="noisyData" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Noisy Data" help="Set &quot;Yes&quot; for target resequencing data (e.g., exome-seq) to avoid false positive predictions due to non-uniform capture" />
             <param name="ploidy" type="text" value="2" label="Genome ploidy" help="In case of doubt, you can set different values and Control-FREEC will select the one that explains most observed CNAs. Ex: 2 or 2,3,4" />
-            <param name="printNA" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Print NA to avoid &quot;-1&quot;" help="Set &quot;No&quot; to avoid printing &quot;-1&quot; to the _ratio.txt files. Useful for exome-seq or targeted sequencing data." />
             <param name="sex" type="select" label="Sample sex" help="&quot;XX&quot; will exclude chr Y from the analysis. &quot;XY&quot; will not annotate one copy of chr X and Y as a loss.">
                 <option value="XY" selected="True">XY</option>
                 <option value="XX">XX</option>
--- a/ratio2circos.py	Thu Aug 13 09:50:35 2020 -0400
+++ b/ratio2circos.py	Tue Aug 18 08:52:45 2020 -0400
@@ -1,20 +1,18 @@
-import argparse
 import math
-import os
+import sys

-parser = argparse.ArgumentParser()
-parser.add_argument('-i', '--input', required=True, default='./output/sample.bam_ratio.BedGraph', type=str)
-parser.add_argument('-o', '--output', required=True, default='./output/sample.bam_ratio_log2_circos.txt', type=str)
-parser.add_argument('-p', '--ploidy', required=True, default=2, type=int)
-args = parser.parse_args()
+ploidy = int(sys.argv[1])

-path = os.path.dirname(args.input)
-output = os.path.join(path, args.output)
+with open("./output/sample.bam_ratio.BedGraph") as bed:
+    with open("./output/sample.bam_ratio_log2_circos.txt", "w+") as olog2r:
+        for line in bed.readlines():
+            ls = line.split()
+            if ls[0] != "track" and float(ls[3]) > 0:
+                log2_ratio = math.log2(float(ls[3]) / ploidy)
+                olog2r.write("{}\t{}\t{}\t{}\n".format(ls[0], ls[1], ls[2], log2_ratio))

-with open(args.input) as file:
-    for line in file.readlines():
-        ls = line.split()
-        if ls[0] != "track" and float(ls[3]) > 0:
-            log2_ratio = math.log2(float(ls[3]) / args.ploidy)
-            with open(output, "a") as out:
-                out.write("{}\t{}\t{}\t{}\n".format(ls[0], ls[1], ls[2], log2_ratio))
+with open("./genome.fa.fai") as fai:
+    with open("./output/karyotype_circos.txt", "w+") as ochr:
+        for line in fai.readlines():
+            ls = line.split()
+            ochr.write("chr - {}\t{}\t0\t{}\t{}\n".format(ls[0], ls[0].strip("chr").lower(), ls[1], ls[0]))