comparison sigmut.xml @ 1:02861b32a62f draft default tip

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sigmut commit bba3eb3950b8772758cc6f19747172be7413ddd9"
author artbio
date Sun, 14 Jun 2020 20:27:29 -0400
parents 9f48c5d97be8
children
comparison
equal deleted inserted replaced
0:9f48c5d97be8 1:02861b32a62f
1 <tool id="SigProfiler" name="SigProfiler" version="@VERSION@"> 1 <tool id="SigProfiler" name="SigProfiler" version="@VERSION@">
2 <description>SigProfiler performs the mutational signature characterization from VCF files.</description> 2 <description>performs mutational signature characterization from variant files</description>
3 3
4 <macros> 4 <macros>
5 <import>sigmut_macros.xml</import> 5 <import>sigmut_macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <expand macro="stdio"/> 8 <expand macro="stdio"/>
9 <command detect_errors="exit_code"><![CDATA[ 9 <command detect_errors="exit_code"><![CDATA[
10 @VERSION@ 10 @VERSION@
11 @pipefail@ 11 @pipefail@
12 #import os 12 BIN=`which sigprofiler | sed 's,/sigprofiler,,g'` &&
13 #import random 13 echo \$BIN &&
14 #import datetime 14 chmod -R 777 \$BIN &&
15 #set job_dir=os.getcwd() 15 mkdir run_dir &&
16 #set run_dir = job_dir + (' ' + str(random.randint(1,100000))).strip() + '/' 16 #if str( $set_analysis.choices ) == "get_sigmut":
17 #set job_num = "Job_" + (' ' + str(random.randint(1,500))).strip() 17 #if str( $set_analysis.vcfile_input.vcfile ) == "maf":
18 18 #set $infile = 'run_dir/snps.maf'
19 mkdir $run_dir &&
20
21 #if str( $set_analysis.choices ) == "install_genome":
22 ln -s -f '$__tool_directory__/install.log' '${logref}' &&
23 #else if str( $set_analysis.choices ) == "get_sigmut":
24 #set err_file = $run_dir + "logs/SigProfilerMatrixGenerator_" + $job_num + "_" + str($set_analysis.refgendat) + str(datetime.date.today()) + ".err"
25 #set log_file = $run_dir + "logs/SigProfilerMatrixGenerator_" + $job_num + "_" + str($set_analysis.refgendat) + str(datetime.date.today()) + ".out"
26
27 #set plot1 = $run_dir + "output/plots/SBS_6_plots_" + $job_num + ".pdf"
28 #set plot2 = $run_dir + "output/plots/SBS_24_plots_" + $job_num + ".pdf"
29 #set plot3 = $run_dir + "output/plots/SBS_78_plots_" + $job_num + ".pdf"
30 #set plot4 = $run_dir + "output/plots/SBS_96_plots_" + $job_num + ".pdf"
31 #set plot5 = $run_dir + "output/plots/SBS_384_plots_" + $job_num + ".pdf"
32 #set plot6 = $run_dir + "output/plots/SBS_1536_plots_" + $job_num + ".pdf"
33 #set plot7 = $run_dir + "output/plots/DBS_78_plots_" + $job_num + ".pdf"
34 #set plot8 = $run_dir + "output/plots/DBS_186_plots_" + $job_num + ".pdf"
35
36 #set plot9 = $run_dir + "output/plots/ID_simple_plots_" + $job_num + ".pdf"
37 #set plot10 = $run_dir + "output/plots/ID_TSB_plots_" + $job_num + ".pdf"
38 #set plot11 = $run_dir + "output/plots/ID_83_plots_" + $job_num + ".pdf"
39 #set plot12 = $run_dir + "output/plots/ID_94_plots_" + $job_num + ".pdf"
40 #set plot13 = $run_dir + "output/plots/ID_96_plots_" + $job_num + ".pdf"
41
42 #set exo1 = $run_dir + "output/DBS/" + $job_num + ".DBS78" + ".exome"
43 #set exo2 = $run_dir + "output/DBS/" + $job_num + ".DBS186" + ".exome"
44 #set exo3 = $run_dir + "output/DBS/" + $job_num + ".DBS1248" + ".exome"
45 #set exo4 = $run_dir + "output/DBS/" + $job_num + ".DBS2976" + ".exome"
46
47 #set exo5 = $run_dir + "output/SBS/" + $job_num + ".SBS6" + ".exome"
48 #set exo6 = $run_dir + "output/SBS/" + $job_num + ".SBS24" + ".exome"
49 #set exo7 = $run_dir + "output/SBS/" + $job_num + ".SBS96" + ".exome"
50 #set exo8 = $run_dir + "output/SBS/" + $job_num + ".SBS384" + ".exome"
51 #set exo9 = $run_dir + "output/SBS/" + $job_num + ".SBS1536" + ".exome"
52 #set exo10 = $run_dir + "output/SBS/" + $job_num + ".SBS6144" + ".exome"
53
54 #set exo11 = $run_dir + "output/vcf_files/DBS/" + $job_num + "_" + "DBS_exome.vcf"
55 #set exo12 = $run_dir + "output/vcf_files/SNV/" + $job_num + "_" + "SNV_exome.vcf"
56
57 #set tsb1 = $run_dir + "output/TSB/strandBiasTest_24.txt"
58 #set tsb2 = $run_dir + "output/TSB/strandBiasTest_384.txt"
59 #set tsb3 = $run_dir + "output/TSB/strandBiasTest_6144.txt"
60 #set tsb4 = $run_dir + "output/TSB/significantResults_strandBiasTest.txt"
61
62 #set seqinf1 = $run_dir + "output/DBS/" + $job_num + ".DBS78" + ".all"
63 #set seqinf2 = $run_dir + "output/DBS/" + $job_num + ".DBS186" + ".all"
64 #set seqinf3 = $run_dir + "output/DBS/" + $job_num + ".DBS1248" + ".all"
65 #set seqinf4 = $run_dir + "output/DBS/" + $job_num + ".DBS2976" + ".all"
66
67 #set seqinf5 = $run_dir + "output/SBS/" + $job_num + ".SBS6" + ".all"
68 #set seqinf6 = $run_dir + "output/SBS/" + $job_num + ".SBS24" + ".all"
69 #set seqinf7 = $run_dir + "output/SBS/" + $job_num + ".SBS96" + ".all"
70 #set seqinf8 = $run_dir + "output/SBS/" + $job_num + ".SBS384" + ".all"
71 #set seqinf9 = $run_dir + "output/SBS/" + $job_num + ".SBS1536" + ".all"
72 #set seqinf10 = $run_dir + "output/SBS/" + $job_num + ".SBS6144" + ".all"
73
74 #set seqinf11 = $run_dir + "output/ID/" + $job_num + ".ID28" + ".all"
75 #set seqinf12 = $run_dir + "output/ID/" + $job_num + ".ID83" + ".all"
76 #set seqinf13 = $run_dir + "output/ID/" + $job_num + ".ID94" + ".all"
77 #set seqinf14 = $run_dir + "output/ID/" + $job_num + ".ID96" + ".all"
78 #set seqinf15 = $run_dir + "output/ID/" + $job_num + ".ID415" + ".all"
79 #set seqinf16 = $run_dir + "output/ID/" + $job_num + ".ID8628" + ".all"
80
81 ln -s -f '$log_file' '${logsmt}' &&
82
83 #if str($set_analysis.plot) == "true":
84 ln -s -f '$plot1' '${SBS6}' &&
85 ln -s -f '$plot2' '${SBS24}' &&
86 ln -s -f '$plot3' '${SBS78}' &&
87 ln -s -f '$plot4' '${SBS96}' &&
88 ln -s -f '$plot5' '${SBS384}' &&
89 ln -s -f '$plot6' '${SBS1536}' &&
90 ln -s -f '$plot7' '${DBS78}' &&
91 ln -s -f '$plot8' '${DBS186}' &&
92 ln -s -f '$plot9' '${ID_simple}' &&
93 ln -s -f '$plot10' '${ID_TSB}' &&
94 ln -s -f '$plot11' '${ID_83}' &&
95 ln -s -f '$plot12' '${ID_94}' &&
96 ln -s -f '$plot13' '${ID_96}' &&
97
98 ln -s -f '$seqinf1' '${SeqInf1}' &&
99 ln -s -f '$seqinf2' '${SeqInf2}' &&
100 ln -s -f '$seqinf3' '${SeqInf3}' &&
101 ln -s -f '$seqinf4' '${SeqInf4}' &&
102 ln -s -f '$seqinf5' '${SeqInf5}' &&
103 ln -s -f '$seqinf6' '${SeqInf6}' &&
104 ln -s -f '$seqinf7' '${SeqInf7}' &&
105 ln -s -f '$seqinf8' '${SeqInf8}' &&
106 ln -s -f '$seqinf9' '${SeqInf9}' &&
107 ln -s -f '$seqinf10' '${SeqInf10}' &&
108
109 ln -s -f '$seqinf11' '${SeqInf11}' &&
110 ln -s -f '$seqinf12' '${SeqInf12}' &&
111 ln -s -f '$seqinf13' '${SeqInf13}' &&
112 ln -s -f '$seqinf14' '${SeqInf14}' &&
113 ln -s -f '$seqinf15' '${SeqInf15}' &&
114 ln -s -f '$seqinf16' '${SeqInf16}' &&
115
116 #end if
117
118 #if str($set_analysis.exome) == "true":
119 ln -s -f '$exo1' '${Exo1}' &&
120 ln -s -f '$exo2' '${Exo2}' &&
121 ln -s -f '$exo3' '${Exo3}' &&
122 ln -s -f '$exo4' '${Exo4}' &&
123 ln -s -f '$exo5' '${Exo5}' &&
124 ln -s -f '$exo6' '${Exo6}' &&
125 ln -s -f '$exo7' '${Exo7}' &&
126 ln -s -f '$exo8' '${Exo8}' &&
127 ln -s -f '$exo9' '${Exo9}' &&
128 ln -s -f '$exo10' '${Exo10}' &&
129 ln -s -f '$exo11' '${Exo11}' &&
130 ln -s -f '$exo12' '${Exo12}' &&
131 #end if
132
133 #if str($set_analysis.tsb_stat) == "true":
134 ln -s -f '$tsb1' '${TSB24}' &&
135 ln -s -f '$tsb2' '${TSB384}' &&
136 ln -s -f '$tsb3' '${TSB6144}' &&
137 ln -s -f '$tsb4' '${sigRes}' &&
138 #end if
139
140
141 #if str($set_analysis.seqInfo) == "true":
142 ln -s -f '$seqinf1' '${SeqInf1}' &&
143 ln -s -f '$seqinf2' '${SeqInf2}' &&
144 ln -s -f '$seqinf3' '${SeqInf3}' &&
145 ln -s -f '$seqinf4' '${SeqInf4}' &&
146 ln -s -f '$seqinf5' '${SeqInf5}' &&
147 ln -s -f '$seqinf6' '${SeqInf6}' &&
148 ln -s -f '$seqinf7' '${SeqInf7}' &&
149 ln -s -f '$seqinf8' '${SeqInf8}' &&
150 ln -s -f '$seqinf9' '${SeqInf9}' &&
151 ln -s -f '$seqinf10' '${SeqInf10}' &&
152 ln -s -f '$seqinf11' '${SeqInf11}' &&
153 ln -s -f '$seqinf12' '${SeqInf12}' &&
154 ln -s -f '$seqinf13' '${SeqInf13}' &&
155 ln -s -f '$seqinf14' '${SeqInf14}' &&
156 ln -s -f '$seqinf15' '${SeqInf15}' &&
157 ln -s -f '$seqinf16' '${SeqInf16}' &&
158 #end if
159
160
161 #if str( $set_analysis.vcfile_input.vcfile ) == "maf":
162 #set $infile = $run_dir + 'snps.maf'
163 ln -s -f '$set_analysis.vcfile_input.maf_file' '$infile' && 19 ln -s -f '$set_analysis.vcfile_input.maf_file' '$infile' &&
164 #else if str( $set_analysis.vcfile_input.vcfile ) == "icgc": 20 #else if str( $set_analysis.vcfile_input.vcfile ) == "icgc":
165 #set $infile = $run_dir + 'snps.txt' 21 #set $infile = 'run_dir/snps.txt'
166 ln -s -f '$set_analysis.vcfile_input.icgc_file' '$infile' && 22 ln -s -f '$set_analysis.vcfile_input.icgc_file' '$infile' &&
167 #else if str( $set_analysis.vcfile_input.vcfile ) == "vcf": 23 #else if str( $set_analysis.vcfile_input.vcfile ) == "vcf":
168 #set $infile = $run_dir + 'snps.vcf' 24 #set $infile = 'run_dir/snps.vcf'
169 ln -s -f '$set_analysis.vcfile_input.vcf_file' '$infile' && 25 ln -s -f '$set_analysis.vcfile_input.vcf_file' '$infile' &&
170 #end if 26 #end if
171 27 #end if
172 #end if 28
173 29 sigprofiler
174 sigprofiler 30
175 #if str( $set_analysis.choices ) == "install_genome": 31 #if str( $set_analysis.choices ) == "install_genome":
176 -ig $set_analysis.refgendwn 32 -ig $set_analysis.refgendwn > install.log
177 #else if str( $set_analysis.choices ) == "get_sigmut": 33 #else if str( $set_analysis.choices ) == "get_sigmut":
178 -n $job_num 34 -g $set_analysis.refgendat
179 -g $set_analysis.refgendat 35 -f 'run_dir'
180 -f $run_dir 36 -n "project"
181 37 -p
182 #if str( $set_analysis.exome ) == "true": 38 ## ! implement exome functionality when good test available
183 -e 39 ## #if str( $set_analysis.exome ) == "true":
184 #end if 40 ## -e
185 #if str( $set_analysis.chrom_based ) == "true": 41 ## #end if
186 -c 42 ## ! implement per chromosome functionality when good test available
187 #end if 43 ## #if str( $set_analysis.chrom_based ) == "true":
188 #if str( $set_analysis.plot ) == "true": 44 ## -c
189 -p 45 ## #end if
190 #end if 46 #if str( $set_analysis.tsb_stat ) == "true":
191 #if str( $set_analysis.tsb_stat ) == "true": 47 -t
192 -t 48 #end if
193 #end if 49 #if str( $set_analysis.gs ) == "true":
194 #if str( $set_analysis.gs ) == "true": 50 -s
195 -s 51 #end if
196 #end if 52 ##-b $set_analysis.bed ### to be done
197 ##-b $set_analysis.bed 53 && pdfcombine -f -s -o blinder.pdf run_dir/output/plots/*.pdf
198 #end if 54 && ls run_dir/logs/
199 ]]></command> 55 #if str( $set_analysis.tsb_stat ) == "true":
200 56 && tail -n +1 run_dir/output/TSB/*.txt > transcriptional_strand_biases.txt
201 <inputs> 57 #end if
202 <conditional name="set_analysis"> 58 #if $set_analysis.seqInfo:
203 <param name="choices" type="select" label="Which of the following jobs do you want perform?"> 59 && tail -n +1 run_dir/output/*/*.all > information.txt
204 <option value="install_genome">Install 'de novo' a reference genome </option> 60 #end if
205 <option value="get_sigmut">Obtain the mutational signatures from VCF files</option> 61 #end if
206 </param> 62 ]]></command>
207 <when value="install_genome"> 63
208 <param name="refgendwn" type="select" label="Reference genome" help="Get data from any of the following reference genomes:"> 64 <inputs>
209 <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option> 65 <conditional name="set_analysis">
210 <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option> 66 <param name="choices" type="select" label="Which of the following jobs do you want perform?">
211 <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option> 67 <option value="install_genome">Install 'de novo' a reference genome </option>
212 <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option> 68 <option value="get_sigmut">Obtain the mutational signatures from VCF files</option>
213 <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option> 69 </param>
214 <option value="c_elegans">Caenorhabditis elegans</option> 70 <when value="install_genome">
215 <option value="dog">Dog</option> 71 <param name="refgendwn" type="select" label="Reference genome" help="Get data from any of the following reference genomes:">
216 </param> 72 <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option>
217 </when> 73 <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option>
218 74 <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option>
219 <when value="get_sigmut"> 75 <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option>
220 <conditional name="vcfile_input"> 76 <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option>
221 <param name="vcfile" type="select" label="VC file" help="Select the format of your input data"> 77 <option value="c_elegans">Caenorhabditis elegans</option>
222 <option value="maf">Mutation Annotation Format</option> 78 <option value="dog">Dog</option>
223 <option value="icgc">Tab-separated file</option> 79 </param>
224 <option value="vcf">Variant Call Format</option> 80 </when>
225 </param> 81
226 <when value='maf'> 82 <when value="get_sigmut">
227 <param name="maf_file" type="data" format="maf" label="select VC file" help="Select the input file in MAF format." /> 83 <conditional name="vcfile_input">
228 </when> 84 <param name="vcfile" type="select" label="VC file" help="Select the format of your input data">
229 <when value='icgc'> 85 <option value="maf">Mutation Annotation Format</option>
230 <param name="icgc_file" type="data" format="txt" label="select VC file" help="Select the input file in ICGC format." /> 86 <option value="icgc">Tab-separated file</option>
231 </when> 87 <option value="vcf">Variant Call Format</option>
232 <when value='vcf'> 88 </param>
233 <param name="vcf_file" type="data" format="vcf" label="select VC file" help="Select the input file in VCF format." /> 89 <when value='maf'>
234 </when> 90 <param name="maf_file" type="data" format="maf" label="select VC file" help="Select the input file in MAF format." />
235 </conditional> 91 </when>
236 92 <when value='icgc'>
237 <param name="refgendat" type="select" label="Reference genome to be analyzed" help="Use the following reference genome:"> 93 <param name="icgc_file" type="data" format="txt" label="select VC file" help="Select the input file in ICGC format." />
238 <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option> 94 </when>
95 <when value='vcf'>
96 <param name="vcf_file" type="data" format="vcf" label="select VC file" help="Select the input file in VCF format." />
97 </when>
98 </conditional>
99
100 <param name="refgendat" type="select" label="Reference genome to be analyzed" help="Use the following reference genome:">
101 <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option>
239 <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option> 102 <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option>
240 <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option> 103 <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option>
241 <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option> 104 <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option>
242 <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option> 105 <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option>
243 <option value="c_elegans">Caenorhabditis elegans</option> 106 <option value="c_elegans">Caenorhabditis elegans</option>
244 <option value="dog">Dog</option> 107 <option value="dog">Dog</option>
245 </param> 108 </param>
246 109
247 <conditional name="bed_input"> 110 <!-- implement bed when test available -->
248 <param name="bedfile" type="select" label="BED file" help="Input a BED file"> 111 <!-- <conditional name="bed_input">
249 <option value="yes">Yes</option> 112 <param name="bedfile" type="select" label="BED file" help="Input a BED file">
250 <option value="no" selected="true">No</option> 113 <option value="yes">Yes</option>
251 </param> 114 <option value="no" selected="true">No</option>
252 <when value='yes'> 115 </param>
253 <param name="bed_file" format="bed" type="data" label="Use a BED file containing the set of regions" help="Provide a BED file"/> 116 <when value='yes'>
254 </when> 117 <param name="bed_file" format="bed" type="data" label="Use a BED file containing the set of regions" help="Provide a BED file"/>
255 <when value='no'> 118 </when>
256 </when> 119 <when value='no'>
257 </conditional> 120 </when>
258 <param name="plot" type="boolean" truevalue="true" label="Produce plot results?" checked="False" help="Show plots"/> 121 </conditional> -->
259 <param name="exome" type="boolean" label="Use only the exome?" checked="False" help="Use exome"/> 122 <!-- implement exome functionality when test available -->
260 <param name="chrom_based" type="boolean" label="Create the matrices on a per chromosome basis?" checked="False" help="Show snvs"/> 123 <!-- <param name="exome" type="boolean" label="Use only the exome?" checked="False" help="Use exome"/> -->
261 <param name="tsb_stat" type="boolean" truevalue="true" label="Performs a transcriptional strand bias test?" checked="False" help="Show snvs"/> 124 <!-- implement chrom_based functionality when test available -->
262 <param name="seqInfo" type="boolean" truevalue="true" label="Export sequence information?" checked="False" help="Show sequence information"/> 125 <!--<param name="chrom_based" type="boolean" label="Create the matrices on a per chromosome basis?" checked="False" help="Show snvs"/> -->
263 <param name="gs" type="boolean" label="Performs gene strand bias test?" checked="False" help="Show snvs"/> 126 <param name="tsb_stat" type="boolean" truevalue="true" label="Performs a transcriptional strand bias test?" checked="False" help="Show snvs"/>
264 </when> 127 <param name="seqInfo" type="boolean" truevalue="true" label="Export sequence information?" checked="False" help="Show sequence information"/>
265 </conditional> 128 <param name="gs" type="boolean" label="Performs gene strand bias test?" checked="False" help="Show snvs"/>
266 </inputs> 129 </when>
267 130 </conditional>
268 <outputs> 131 </inputs>
269 <data format="txt" name="logref" label="Log file: Install a Reference Genome"> 132
270 <filter>set_analysis['choices'] == 'install_genome'</filter> 133 <outputs>
271 </data> 134 <data format="txt" name="logref" label="Log file: Install a Reference Genome"
272 135 from_work_dir="./install.log">
273 <data format="txt" name="logsmt" label="Log file: Calculate Mutational Signatures"> 136 <filter>set_analysis['choices'] == 'install_genome'</filter>
274 <filter>set_analysis['choices'] == 'get_sigmut'</filter> 137 </data>
275 </data> 138 <data format="txt" name="logsmt" label="Log file: Calculate Mutational Signatures"
276 139 from_work_dir="run_dir/logs/SigProfilerMatrixGenerator*.out">
277 <data format="pdf" name="SBS6" label="--> Plot SBS 6 Sig. Mut."> 140 <filter>set_analysis['choices'] == 'get_sigmut'</filter>
278 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 141 </data>
279 </data> 142
280 143 <data format="pdf" name="blinder" label="SBS Mutational Signatures plots (pdf)"
281 <data format="pdf" name="SBS24" label="--> Plot SBS 24 Sig. Mut."> 144 from_work_dir="./blinder.pdf" >
282 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 145 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
283 </data> 146 </data>
284 147
285 <data format="pdf" name="SBS78" label="--> Plot SBS 78 Sig. Mut."> 148 <!-- implement exome outputs when test available -->
286 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 149 <!--
287 </data> 150 <data format="txt" name="dbs_exome" label="DBS_exome.vcf">
288 151 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
289 <data format="pdf" name="SBS96" label="--> Plot SBS 96 Sig. Mut."> 152 </data>
290 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 153 <data format="txt" name="snv_exome" label="SNV_exome.vcf">
291 </data> 154 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
292 155 </data>
293 <data format="pdf" name="SBS384" label="--> Plot SBS 384 Sig. Mut."> 156
294 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 157 <data format="txt" name="sig_exome" label="DBS 78 and so on Sig. Mut. EXOME">
295 </data> 158 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
296 159 </data>
297 <data format="pdf" name="SBS1536" label="--> Plot SBS 1536 Sig. Mut."> 160 -->
298 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 161 <data format="txt" name="tsb" label="Transcriptional Strand Biases"
299 </data> 162 from_work_dir="./transcriptional_strand_biases.txt" >
300 163 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter>
301 <data format="pdf" name="DBS78" label="--> Plot DBS 78 Sig. Mut."> 164 </data>
302 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 165
303 </data> 166 <data format="txt" name="seqinfo" label="Mutational Signature detailed infos"
304 167 from_work_dir="./information.txt" >
305 <data format="pdf" name="DBS186" label="--> Plot DBS 186 Sig. Mut."> 168 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
306 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 169 </data>
307 </data> 170
308 171 </outputs>
309 <data format="pdf" name="ID_simple" label="--> Plot ID simple Sig. Mut."> 172 <tests>
310 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 173 <test>
311 </data> 174 <param name="choices" value="install_genome"/>
312 175 <param name="refgendwn" value="GRCh38"/>
313 <data format="pdf" name="ID_TSB" label="--> Plot ID TSB Sig. Mut."> 176 <output name="logref" file="hg38_install.log" lines_diff="5"/>
314 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 177 </test>
315 </data> 178 <test>
316 179 <param name="choices" value="get_sigmut"/>
317 <data format="pdf" name="ID_83" label="--> Plot ID 83 Sig. Mut."> 180 <param name="refgendat" value="GRCh38"/>
318 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 181 <param name="vcfile" value="vcf"/>
319 </data> 182 <param name="vcf_file" ftype="vcf" value="hg38.vcf"/>
320 183 <param name="plot" value="True"/>
321 <data format="pdf" name="ID_94" label="--> Plot ID 94 Sig. Mut."> 184 <output name="logsmt" ftype="txt" file="sigmut.log" lines_diff="5" />
322 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 185 <output name="blinder" file="hg38_blinder.pdf" lines_diff="5" />
323 </data> 186 </test>
324 187 </tests>
325 <data format="pdf" name="ID_96" label="--> Plot ID 96 Sig. Mut."> 188
326 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> 189 <help><![CDATA[
327 </data> 190
328 191 **SigProfiler**
329 <data format="txt" name="Exo11" label="--> DBS_exome.vcf"> 192
330 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 193 Background:
331 </data> 194
332 195 Cancer genomes evince somatic mutations, which are imprinted by
333 <data format="txt" name="Exo12" label="--> SNV_exome.vcf"> 196 different mutational processes, that give rise to diverse
334 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 197 mutational signatures. Their analysis from single base
335 </data> 198 substitutions and their immediate sequencing context, allows the
336 199 classification of small mutational events (including
337 <data format="txt" name="Exo1" label="--> DBS 78 Sig. Mut. EXOME"> 200 substitutions, insertions, deletions, and doublet substitutions)
338 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 201 for better understanding the mutational processes that have
339 </data> 202 shaped a cancer genome.
340 203
341 <data format="txt" name="Exo2" label="--> DBS 186 Sig. Mut. EXOME"> 204 In this sense, SigProfiler constitutes a Galaxy-based wrapper of
342 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 205 a computational method developed by Ludmil B. Alexandrov, that
343 </data> 206 allow the exploration and visualization of mutational patterns
344 207 for all types of small mutational events. Specifically, the
345 <data format="txt" name="Exo3" label="--> DBS 1248 Sig. Mut. EXOME"> 208 following actions can be performed using SigProfiler wrapper:
346 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 209
347 </data> 210 1. Identify and categorize the mutations based on possible
348 211 single nucleotide variants (SNVs), double base substitutions
349 <data format="txt" name="Exo4" label="--> DBS 2976 Sig. Mut. EXOME"> 212 (DBS), and insertions/deletions and provides further
350 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 213 transcriptional strand bias categorization. Afterwards, the
351 </data> 214 classification of these mutations are integrated into distinct
352 215 matrices.
353 <data format="txt" name="Exo5" label="--> SBS 6 Sig. Mut. EXOME"> 216 SigProfiler provides matrix generation support for SBS-6,
354 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 217 SBS-96, SBS-1536, DBS-78 and DBS-1248. In addition, the
355 </data> 218 generation of mutational matrices of indels including
356 219 ID-28 and ID-83 are procured. Besides, an ID-8628 matrix that
357 <data format="txt" name="Exo6" label="--> SBS 24 Sig. Mut. EXOME"> 220 extends the ID-83 classification is generated.
358 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 221 SigProfiler examines transcriptional strand bias for single base
359 </data> 222 substitutions, doublet base substitutions, and small indels. It
360 223 is evaluated whether a mutation occurs on the transcribed or the
361 <data format="txt" name="Exo7" label="--> SBS 96 Sig. Mut. EXOME"> 224 non-transcribed strand of well-annotated protein coding genes of
362 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 225 a reference genome. Mutations found in the transcribed regions
363 </data> 226 of the genome are further subclassified as: (i) transcribed,
364 227 (ii) un-transcribed, (iii) bi-directional, or (iv) unknown.
365 <data format="txt" name="Exo8" label="--> SBS 384 Sig. Mut. EXOME"> 228
366 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 229 2. Generation of plots of all types of mutational signatures as
367 </data> 230 well as all types of mutational patterns in cancer genomes.
368 231
369 <data format="txt" name="Exo9" label="--> SBS 1536 Sig. Mut. EXOME"> 232 Additional Information:
370 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 233
371 </data> 234 Classification of Single Base substitutions (SBSs):
372 235 Single base substitutions (SBSs) are single DNA base-pairs
373 <data format="txt" name="Exo10" label="--> SBS 6144 Sig. Mut. EXOME"> 236 substituted with another single DNA base-pairs. The most
374 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> 237 basic classification catalogues SBSs into six distinct
375 </data> 238 categories, including: C:G > A:T, C:G > G:C, C:G > T:A,
376 239 T:A > A:T, T:A > C:G, and T:A > G:C. In practice, a C:G > A:T
377 <data format="txt" name="sigRes" label="--> TSB: Significant Results"> 240 substitution is denoted as either a C > A mutation using the
378 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter> 241 pyrimidine base or as a G > T mutation using the purine base.
379 </data> 242 In consequence, the most commonly used SBS-6 classification of
380 243 single base substitutions can be written as: C > A, C > G,
381 <data format="txt" name="TSB24" label="--> TSB: 24 Sig. Mut."> 244 C > T, T > A, T > C, and T > G.
382 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter> 245 Additionally, the SBS-6 classification can be further
383 </data> 246 expanded by considering the base-pairs immediately
384 247 adjacent 5′ and 3′ to the somatic mutation. Therefore, an
385 <data format="txt" name="TSB384" label="--> TSB: 96 Sig. Mut."> 248 extended classification for analysis of mutational signatures is
386 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter> 249 SBS-96, where each of the classes in SBS-6 is further elaborated
387 </data> 250 using one base adjacent at the 5′ of the mutation and one base
388 251 adjacent at the 3′ of the mutation.
389 <data format="txt" name="TSB6144" label="--> TSB: 1536 Sig. Mut."> 252 Logically, SBS-96 can be further elaborated by including
390 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter> 253 additional 5′ and 3′ adjacent context. Each of the six single
391 </data> 254 base substitutions in SBS-6 has 256 possible pentanucleotides
392 255 resulting in a classification with 1536 possible channels.
393 256
394 <data format="txt" name="SeqInf1" label="--> DBS 78 Sig. Mut. ALL"> 257 Classification of Doublet Base substitutions (DBSs):
395 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 258 Doublet base substitutions (DBSs) are somatic mutations in which
396 </data> 259 a set of two adjacent DNA base-pairs is simultaneously
397 260 substituted with another set of two adjacent DNA base-pairs. An
398 <data format="txt" name="SeqInf2" label="--> DBS 186 Sig. Mut. ALL"> 261 example of a DBS is a set of CT:GA base-pairs mutating to a set
399 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 262 of AA:TT base-pairs, which is usually denoted as CT:GA > AA:TT.
400 </data> 263 It should be noted that a CT:GA > AA:TT mutation can be
401 264 equivalently written as either a CT > AA mutation. Overall, the
402 <data format="txt" name="SeqInf3" label="--> DBS 1248 Sig. Mut. ALL"> 265 basic classification catalogues DBSs into 78 distinct categories
403 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 266 denoted as the DBS-78 matrix.
404 </data> 267 Similarly, we can expand the characterization of DBS mutations
405 268 by considering the 5′ and 3′ adjacent contexts. With
406 <data format="txt" name="SeqInf4" label="--> DBS 2976 Sig. Mut. ALL"> 269 seventy-eight possible DBS mutations having sixteen possible
407 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 270 tetranucleotides each, this context expansion results in 1248
408 </data> 271 possible channels denoted as the DBS-1248 context.
409 272
410 <data format="txt" name="SeqInf5" label="--> SBS 6 Sig. Mut. ALL"> 273 Classification of small insertions and deletions (IDs):
411 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 274 A somatic insertion is the incorporation of a set of base-pairs
412 </data> 275 that lengthens a chromosome, while a somatic deletion is the
413 276 removing of a set of existing base-pairs from a given location
414 <data format="txt" name="SeqInf6" label="--> SBS 24 Sig. Mut. ALL"> 277 of a chromosome.
415 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 278 Unfortunately, indel classification cannot be performed
416 </data> 279 analogously to SBS or DBS classifications, where the immediate
417 280 sequencing context flanking each mutation was
418 <data format="txt" name="SeqInf7" label="--> SBS 96 Sig. Mut. ALL"> 281 utilized to subclassify these mutational events.
419 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 282 Consequently, indels (IDs) are classified as single base-pair
420 </data> 283 or longer events. They can be further subclassified as either a
421 284 C:G or a T:A indel, while longer indels can also be
422 <data format="txt" name="SeqInf8" label="--> SBS 384 Sig. Mut. ALL"> 285 subclassified based on their lengths: 2 bp, 3 bp, 4 bp, and
423 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 286 5 + bp.
424 </data> 287
425 288 Incorporation of transcription Strand Bias (TSB):
426 <data format="txt" name="SeqInf9" label="--> SBS 1536 Sig. Mut. ALL"> 289 The mutational classifications described above allow the
427 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 290 characterization of mutational patterns of single base
428 </data> 291 substitutions, doublet base substitutions, and small insertions
429 292 and deletions. Nevertheless, these classifications can be
430 <data format="txt" name="SeqInf10" label="--> SBS 6144 Sig. Mut. ALL"> 293 further elaborated by incorporating strand bias. Mutations
431 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 294 from the same type are expected to be equally distributed across the two
432 </data> 295 DNA strands. However, in many cases an asymmetric number of mutations are
433 296 observed due to either one of the strands being preferentially
434 297 repaired or one of the strands having a higher propensity for
435 <data format="txt" name="SeqInf11" label="--> ID 28 Sig. Mut. ALL"> 298 being damaged. To sub-classify mutations based on their
436 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 299 transcriptional strand bias, the pyrimidine orientation with
437 </data> 300 respect to the locations of well-annotated protein coding genes
438 301 on a genome is considered.
439 <data format="txt" name="SeqInf12" label="--> ID 83 Sig. Mut. ALL"> 302
440 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 303 Running SigProfiler:
441 </data> 304
442 305 1. Reference Genomes:
443 <data format="txt" name="SeqInf13" label="--> ID 94 Sig. Mut. ALL"> 306 Before using SigProfiler, the installation of a reference genome
444 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 307 is demanded. By default, the tool supports the following
445 </data> 308 reference genomes:
446 309
447 <data format="txt" name="SeqInf14" label="--> ID 96 Sig. Mut. ALL"> 310 Human: GRCh37 & GRCh38
448 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 311
449 </data> 312 Mouse: mm9 & mm10
450 313
451 <data format="txt" name="SeqInf15" label="--> ID 415 Sig. Mut. ALL"> 314 Rat: rn6
452 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 315
453 </data> 316 Nematode: c_elegans
454 317
455 <data format="txt" name="SeqInf16" label="--> ID 8628 Sig. Mut. ALL"> 318 A right command line should look like:
456 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> 319
457 </data> 320 sigprofiler -ig GRCh37
458 321
459 </outputs> 322 2. Mutational signatures calculation:
460 323
461 <tests> 324 After successful installation of a reference genome, SigProfiler
462 <test> 325 can be applied to files containing somatic mutations in multiple
463 <conditional name="set_analysis"> 326 formats, for transforming these mutational catalogues into mutational
464 <param name="choices" value="install_genome"/> 327 matrices. Specifically, the tool can read data formats such as
465 <param name="refgendwn" ftype="fasta" value="c_elegans"/> 328 Variant Calling Format (VCF) and Mutation Annotation Format
466 </conditional> 329 (MAF) and the following parameters should be provided for
467 <output name="logref" file="c_elegans.log" lines_diff="5"/> 330 generating the diverse matrices and plots:
468 </test> 331
469 332 --name | -n = Project name
470 333 --genome | -g = Reference Genome
471 <test> 334 -files | -f = Absolute path where the input mutation files are located
472 <conditional name="set_analysis"> 335
473 <param name="choices" value="get_sigmut"/> 336 A right command line should look like:
474 <param name="refgendat" ftype="fasta" value="c_elegans"/> 337
475 <conditional name="vcfile_input"> 338 sigprofiler -n MYPROJECT -g GRCh37 -f /path_to_folder_with_VCF_files/ -p
476 <param name="vcfile" value="icgc"/> 339
477 <param name="icgc_file" ftype="txt" value="test_matrix.txt"/> 340 **Options**
478 </conditional> 341 --version show program's version number and exit
479 <conditional name="bed_input"> 342
480 <param name="bedfile" value="no"/> 343 -h, --help show this help message and exit
481 </conditional> 344
482 <param name="plot" value="True"/> 345 --install_genome Install de novo any of the following reference
483 </conditional> 346 genomes: 'GRCh37', 'GRCh38', 'mm9' or 'mm10'.
484 347
485 <output name="ID_simple" file="ID_simple.pdf" lines_diff="5"/> 348 --name=APPENDIX Provide a project name
486 <output name="ID_TSB" file="ID_TSB.pdf" lines_diff="5"/> 349
487 <output name="ID_83" file="ID_83.pdf" lines_diff="5"/> 350 --genome=NAME Provide a reference genome (ex: GRCh37, GRCh38,
488 351 mm9 or mm10).
489 </test> 352
490 353 --files=Abs_path Path where the input vcf files are located
491 </tests> 354
492 355 --exome Use only the exome or not
493 <help><![CDATA[ 356
494 357 --bed=FILE BED file containing the set of regions to be used
495 **SigProfiler** 358 in generating the matrices
496 359
497 This script configures the SigProfiler analysis pipeline. 360 --chrom Create the matrices on a per chromosome basis
498 You must specify a VCF file for at least one sample. 361
499 362 --plot Generate the plots for each context
500 363
501 **Options** 364 --tsb Performs a transcriptional strand bias test for the
502 --version show program's version number and exit 365 24, 384, and 6144 contexts
503 366
504 -h, --help show this help message and exit 367 --gs Performs a gene strand bias test
505 368
506 --install_genome Install de novo any of the following reference 369 For further info see: https://github.com/AlexandrovLab/SigProfilerMatrixGenerator
507 genomes: 'GRCh37', 'GRCh38', 'mm9' or 'mm10'. 370
508 371 ]]></help>
509 --name=APPENDIX Provide a project name 372
510 373 <citations>
511 --genome=NAME Provide a reference genome (ex: GRCh37, GRCh38, 374 <citation type="doi">10.1186/s12864-019-6041-2</citation>
512 mm9 or mm10). 375 </citations>
513
514 --files=Abs_path Path where the input vcf files are located
515
516 --exome Use only the exome or not
517
518 --bed=FILE BED file containing the set of regions to be used
519 in generating the matrices
520
521 --chrom Create the matrices on a per chromosome basis
522
523 --plot Generate the plots for each context
524
525 --tsb Performs a transcriptional strand bias test for the
526 24, 384, and 6144 contexts
527
528 --gs Performs a gene strand bias test
529
530 For further info see: https://github.com/AlexandrovLab/SigProfilerMatrixGenerator
531
532 ]]></help>
533
534 <citations>
535 <citation type="doi">10.1186/s12864-019-6041-2</citation>
536 </citations>
537 376
538 </tool> 377 </tool>