Mercurial > repos > artbio > sigmut
comparison sigmut.xml @ 1:02861b32a62f draft default tip
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sigmut commit bba3eb3950b8772758cc6f19747172be7413ddd9"
author | artbio |
---|---|
date | Sun, 14 Jun 2020 20:27:29 -0400 |
parents | 9f48c5d97be8 |
children |
comparison
equal
deleted
inserted
replaced
0:9f48c5d97be8 | 1:02861b32a62f |
---|---|
1 <tool id="SigProfiler" name="SigProfiler" version="@VERSION@"> | 1 <tool id="SigProfiler" name="SigProfiler" version="@VERSION@"> |
2 <description>SigProfiler performs the mutational signature characterization from VCF files.</description> | 2 <description>performs mutational signature characterization from variant files</description> |
3 | 3 |
4 <macros> | 4 <macros> |
5 <import>sigmut_macros.xml</import> | 5 <import>sigmut_macros.xml</import> |
6 </macros> | 6 </macros> |
7 <expand macro="requirements"/> | 7 <expand macro="requirements"/> |
8 <expand macro="stdio"/> | 8 <expand macro="stdio"/> |
9 <command detect_errors="exit_code"><![CDATA[ | 9 <command detect_errors="exit_code"><![CDATA[ |
10 @VERSION@ | 10 @VERSION@ |
11 @pipefail@ | 11 @pipefail@ |
12 #import os | 12 BIN=`which sigprofiler | sed 's,/sigprofiler,,g'` && |
13 #import random | 13 echo \$BIN && |
14 #import datetime | 14 chmod -R 777 \$BIN && |
15 #set job_dir=os.getcwd() | 15 mkdir run_dir && |
16 #set run_dir = job_dir + (' ' + str(random.randint(1,100000))).strip() + '/' | 16 #if str( $set_analysis.choices ) == "get_sigmut": |
17 #set job_num = "Job_" + (' ' + str(random.randint(1,500))).strip() | 17 #if str( $set_analysis.vcfile_input.vcfile ) == "maf": |
18 | 18 #set $infile = 'run_dir/snps.maf' |
19 mkdir $run_dir && | |
20 | |
21 #if str( $set_analysis.choices ) == "install_genome": | |
22 ln -s -f '$__tool_directory__/install.log' '${logref}' && | |
23 #else if str( $set_analysis.choices ) == "get_sigmut": | |
24 #set err_file = $run_dir + "logs/SigProfilerMatrixGenerator_" + $job_num + "_" + str($set_analysis.refgendat) + str(datetime.date.today()) + ".err" | |
25 #set log_file = $run_dir + "logs/SigProfilerMatrixGenerator_" + $job_num + "_" + str($set_analysis.refgendat) + str(datetime.date.today()) + ".out" | |
26 | |
27 #set plot1 = $run_dir + "output/plots/SBS_6_plots_" + $job_num + ".pdf" | |
28 #set plot2 = $run_dir + "output/plots/SBS_24_plots_" + $job_num + ".pdf" | |
29 #set plot3 = $run_dir + "output/plots/SBS_78_plots_" + $job_num + ".pdf" | |
30 #set plot4 = $run_dir + "output/plots/SBS_96_plots_" + $job_num + ".pdf" | |
31 #set plot5 = $run_dir + "output/plots/SBS_384_plots_" + $job_num + ".pdf" | |
32 #set plot6 = $run_dir + "output/plots/SBS_1536_plots_" + $job_num + ".pdf" | |
33 #set plot7 = $run_dir + "output/plots/DBS_78_plots_" + $job_num + ".pdf" | |
34 #set plot8 = $run_dir + "output/plots/DBS_186_plots_" + $job_num + ".pdf" | |
35 | |
36 #set plot9 = $run_dir + "output/plots/ID_simple_plots_" + $job_num + ".pdf" | |
37 #set plot10 = $run_dir + "output/plots/ID_TSB_plots_" + $job_num + ".pdf" | |
38 #set plot11 = $run_dir + "output/plots/ID_83_plots_" + $job_num + ".pdf" | |
39 #set plot12 = $run_dir + "output/plots/ID_94_plots_" + $job_num + ".pdf" | |
40 #set plot13 = $run_dir + "output/plots/ID_96_plots_" + $job_num + ".pdf" | |
41 | |
42 #set exo1 = $run_dir + "output/DBS/" + $job_num + ".DBS78" + ".exome" | |
43 #set exo2 = $run_dir + "output/DBS/" + $job_num + ".DBS186" + ".exome" | |
44 #set exo3 = $run_dir + "output/DBS/" + $job_num + ".DBS1248" + ".exome" | |
45 #set exo4 = $run_dir + "output/DBS/" + $job_num + ".DBS2976" + ".exome" | |
46 | |
47 #set exo5 = $run_dir + "output/SBS/" + $job_num + ".SBS6" + ".exome" | |
48 #set exo6 = $run_dir + "output/SBS/" + $job_num + ".SBS24" + ".exome" | |
49 #set exo7 = $run_dir + "output/SBS/" + $job_num + ".SBS96" + ".exome" | |
50 #set exo8 = $run_dir + "output/SBS/" + $job_num + ".SBS384" + ".exome" | |
51 #set exo9 = $run_dir + "output/SBS/" + $job_num + ".SBS1536" + ".exome" | |
52 #set exo10 = $run_dir + "output/SBS/" + $job_num + ".SBS6144" + ".exome" | |
53 | |
54 #set exo11 = $run_dir + "output/vcf_files/DBS/" + $job_num + "_" + "DBS_exome.vcf" | |
55 #set exo12 = $run_dir + "output/vcf_files/SNV/" + $job_num + "_" + "SNV_exome.vcf" | |
56 | |
57 #set tsb1 = $run_dir + "output/TSB/strandBiasTest_24.txt" | |
58 #set tsb2 = $run_dir + "output/TSB/strandBiasTest_384.txt" | |
59 #set tsb3 = $run_dir + "output/TSB/strandBiasTest_6144.txt" | |
60 #set tsb4 = $run_dir + "output/TSB/significantResults_strandBiasTest.txt" | |
61 | |
62 #set seqinf1 = $run_dir + "output/DBS/" + $job_num + ".DBS78" + ".all" | |
63 #set seqinf2 = $run_dir + "output/DBS/" + $job_num + ".DBS186" + ".all" | |
64 #set seqinf3 = $run_dir + "output/DBS/" + $job_num + ".DBS1248" + ".all" | |
65 #set seqinf4 = $run_dir + "output/DBS/" + $job_num + ".DBS2976" + ".all" | |
66 | |
67 #set seqinf5 = $run_dir + "output/SBS/" + $job_num + ".SBS6" + ".all" | |
68 #set seqinf6 = $run_dir + "output/SBS/" + $job_num + ".SBS24" + ".all" | |
69 #set seqinf7 = $run_dir + "output/SBS/" + $job_num + ".SBS96" + ".all" | |
70 #set seqinf8 = $run_dir + "output/SBS/" + $job_num + ".SBS384" + ".all" | |
71 #set seqinf9 = $run_dir + "output/SBS/" + $job_num + ".SBS1536" + ".all" | |
72 #set seqinf10 = $run_dir + "output/SBS/" + $job_num + ".SBS6144" + ".all" | |
73 | |
74 #set seqinf11 = $run_dir + "output/ID/" + $job_num + ".ID28" + ".all" | |
75 #set seqinf12 = $run_dir + "output/ID/" + $job_num + ".ID83" + ".all" | |
76 #set seqinf13 = $run_dir + "output/ID/" + $job_num + ".ID94" + ".all" | |
77 #set seqinf14 = $run_dir + "output/ID/" + $job_num + ".ID96" + ".all" | |
78 #set seqinf15 = $run_dir + "output/ID/" + $job_num + ".ID415" + ".all" | |
79 #set seqinf16 = $run_dir + "output/ID/" + $job_num + ".ID8628" + ".all" | |
80 | |
81 ln -s -f '$log_file' '${logsmt}' && | |
82 | |
83 #if str($set_analysis.plot) == "true": | |
84 ln -s -f '$plot1' '${SBS6}' && | |
85 ln -s -f '$plot2' '${SBS24}' && | |
86 ln -s -f '$plot3' '${SBS78}' && | |
87 ln -s -f '$plot4' '${SBS96}' && | |
88 ln -s -f '$plot5' '${SBS384}' && | |
89 ln -s -f '$plot6' '${SBS1536}' && | |
90 ln -s -f '$plot7' '${DBS78}' && | |
91 ln -s -f '$plot8' '${DBS186}' && | |
92 ln -s -f '$plot9' '${ID_simple}' && | |
93 ln -s -f '$plot10' '${ID_TSB}' && | |
94 ln -s -f '$plot11' '${ID_83}' && | |
95 ln -s -f '$plot12' '${ID_94}' && | |
96 ln -s -f '$plot13' '${ID_96}' && | |
97 | |
98 ln -s -f '$seqinf1' '${SeqInf1}' && | |
99 ln -s -f '$seqinf2' '${SeqInf2}' && | |
100 ln -s -f '$seqinf3' '${SeqInf3}' && | |
101 ln -s -f '$seqinf4' '${SeqInf4}' && | |
102 ln -s -f '$seqinf5' '${SeqInf5}' && | |
103 ln -s -f '$seqinf6' '${SeqInf6}' && | |
104 ln -s -f '$seqinf7' '${SeqInf7}' && | |
105 ln -s -f '$seqinf8' '${SeqInf8}' && | |
106 ln -s -f '$seqinf9' '${SeqInf9}' && | |
107 ln -s -f '$seqinf10' '${SeqInf10}' && | |
108 | |
109 ln -s -f '$seqinf11' '${SeqInf11}' && | |
110 ln -s -f '$seqinf12' '${SeqInf12}' && | |
111 ln -s -f '$seqinf13' '${SeqInf13}' && | |
112 ln -s -f '$seqinf14' '${SeqInf14}' && | |
113 ln -s -f '$seqinf15' '${SeqInf15}' && | |
114 ln -s -f '$seqinf16' '${SeqInf16}' && | |
115 | |
116 #end if | |
117 | |
118 #if str($set_analysis.exome) == "true": | |
119 ln -s -f '$exo1' '${Exo1}' && | |
120 ln -s -f '$exo2' '${Exo2}' && | |
121 ln -s -f '$exo3' '${Exo3}' && | |
122 ln -s -f '$exo4' '${Exo4}' && | |
123 ln -s -f '$exo5' '${Exo5}' && | |
124 ln -s -f '$exo6' '${Exo6}' && | |
125 ln -s -f '$exo7' '${Exo7}' && | |
126 ln -s -f '$exo8' '${Exo8}' && | |
127 ln -s -f '$exo9' '${Exo9}' && | |
128 ln -s -f '$exo10' '${Exo10}' && | |
129 ln -s -f '$exo11' '${Exo11}' && | |
130 ln -s -f '$exo12' '${Exo12}' && | |
131 #end if | |
132 | |
133 #if str($set_analysis.tsb_stat) == "true": | |
134 ln -s -f '$tsb1' '${TSB24}' && | |
135 ln -s -f '$tsb2' '${TSB384}' && | |
136 ln -s -f '$tsb3' '${TSB6144}' && | |
137 ln -s -f '$tsb4' '${sigRes}' && | |
138 #end if | |
139 | |
140 | |
141 #if str($set_analysis.seqInfo) == "true": | |
142 ln -s -f '$seqinf1' '${SeqInf1}' && | |
143 ln -s -f '$seqinf2' '${SeqInf2}' && | |
144 ln -s -f '$seqinf3' '${SeqInf3}' && | |
145 ln -s -f '$seqinf4' '${SeqInf4}' && | |
146 ln -s -f '$seqinf5' '${SeqInf5}' && | |
147 ln -s -f '$seqinf6' '${SeqInf6}' && | |
148 ln -s -f '$seqinf7' '${SeqInf7}' && | |
149 ln -s -f '$seqinf8' '${SeqInf8}' && | |
150 ln -s -f '$seqinf9' '${SeqInf9}' && | |
151 ln -s -f '$seqinf10' '${SeqInf10}' && | |
152 ln -s -f '$seqinf11' '${SeqInf11}' && | |
153 ln -s -f '$seqinf12' '${SeqInf12}' && | |
154 ln -s -f '$seqinf13' '${SeqInf13}' && | |
155 ln -s -f '$seqinf14' '${SeqInf14}' && | |
156 ln -s -f '$seqinf15' '${SeqInf15}' && | |
157 ln -s -f '$seqinf16' '${SeqInf16}' && | |
158 #end if | |
159 | |
160 | |
161 #if str( $set_analysis.vcfile_input.vcfile ) == "maf": | |
162 #set $infile = $run_dir + 'snps.maf' | |
163 ln -s -f '$set_analysis.vcfile_input.maf_file' '$infile' && | 19 ln -s -f '$set_analysis.vcfile_input.maf_file' '$infile' && |
164 #else if str( $set_analysis.vcfile_input.vcfile ) == "icgc": | 20 #else if str( $set_analysis.vcfile_input.vcfile ) == "icgc": |
165 #set $infile = $run_dir + 'snps.txt' | 21 #set $infile = 'run_dir/snps.txt' |
166 ln -s -f '$set_analysis.vcfile_input.icgc_file' '$infile' && | 22 ln -s -f '$set_analysis.vcfile_input.icgc_file' '$infile' && |
167 #else if str( $set_analysis.vcfile_input.vcfile ) == "vcf": | 23 #else if str( $set_analysis.vcfile_input.vcfile ) == "vcf": |
168 #set $infile = $run_dir + 'snps.vcf' | 24 #set $infile = 'run_dir/snps.vcf' |
169 ln -s -f '$set_analysis.vcfile_input.vcf_file' '$infile' && | 25 ln -s -f '$set_analysis.vcfile_input.vcf_file' '$infile' && |
170 #end if | 26 #end if |
171 | 27 #end if |
172 #end if | 28 |
173 | 29 sigprofiler |
174 sigprofiler | 30 |
175 #if str( $set_analysis.choices ) == "install_genome": | 31 #if str( $set_analysis.choices ) == "install_genome": |
176 -ig $set_analysis.refgendwn | 32 -ig $set_analysis.refgendwn > install.log |
177 #else if str( $set_analysis.choices ) == "get_sigmut": | 33 #else if str( $set_analysis.choices ) == "get_sigmut": |
178 -n $job_num | 34 -g $set_analysis.refgendat |
179 -g $set_analysis.refgendat | 35 -f 'run_dir' |
180 -f $run_dir | 36 -n "project" |
181 | 37 -p |
182 #if str( $set_analysis.exome ) == "true": | 38 ## ! implement exome functionality when good test available |
183 -e | 39 ## #if str( $set_analysis.exome ) == "true": |
184 #end if | 40 ## -e |
185 #if str( $set_analysis.chrom_based ) == "true": | 41 ## #end if |
186 -c | 42 ## ! implement per chromosome functionality when good test available |
187 #end if | 43 ## #if str( $set_analysis.chrom_based ) == "true": |
188 #if str( $set_analysis.plot ) == "true": | 44 ## -c |
189 -p | 45 ## #end if |
190 #end if | 46 #if str( $set_analysis.tsb_stat ) == "true": |
191 #if str( $set_analysis.tsb_stat ) == "true": | 47 -t |
192 -t | 48 #end if |
193 #end if | 49 #if str( $set_analysis.gs ) == "true": |
194 #if str( $set_analysis.gs ) == "true": | 50 -s |
195 -s | 51 #end if |
196 #end if | 52 ##-b $set_analysis.bed ### to be done |
197 ##-b $set_analysis.bed | 53 && pdfcombine -f -s -o blinder.pdf run_dir/output/plots/*.pdf |
198 #end if | 54 && ls run_dir/logs/ |
199 ]]></command> | 55 #if str( $set_analysis.tsb_stat ) == "true": |
200 | 56 && tail -n +1 run_dir/output/TSB/*.txt > transcriptional_strand_biases.txt |
201 <inputs> | 57 #end if |
202 <conditional name="set_analysis"> | 58 #if $set_analysis.seqInfo: |
203 <param name="choices" type="select" label="Which of the following jobs do you want perform?"> | 59 && tail -n +1 run_dir/output/*/*.all > information.txt |
204 <option value="install_genome">Install 'de novo' a reference genome </option> | 60 #end if |
205 <option value="get_sigmut">Obtain the mutational signatures from VCF files</option> | 61 #end if |
206 </param> | 62 ]]></command> |
207 <when value="install_genome"> | 63 |
208 <param name="refgendwn" type="select" label="Reference genome" help="Get data from any of the following reference genomes:"> | 64 <inputs> |
209 <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option> | 65 <conditional name="set_analysis"> |
210 <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option> | 66 <param name="choices" type="select" label="Which of the following jobs do you want perform?"> |
211 <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option> | 67 <option value="install_genome">Install 'de novo' a reference genome </option> |
212 <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option> | 68 <option value="get_sigmut">Obtain the mutational signatures from VCF files</option> |
213 <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option> | 69 </param> |
214 <option value="c_elegans">Caenorhabditis elegans</option> | 70 <when value="install_genome"> |
215 <option value="dog">Dog</option> | 71 <param name="refgendwn" type="select" label="Reference genome" help="Get data from any of the following reference genomes:"> |
216 </param> | 72 <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option> |
217 </when> | 73 <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option> |
218 | 74 <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option> |
219 <when value="get_sigmut"> | 75 <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option> |
220 <conditional name="vcfile_input"> | 76 <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option> |
221 <param name="vcfile" type="select" label="VC file" help="Select the format of your input data"> | 77 <option value="c_elegans">Caenorhabditis elegans</option> |
222 <option value="maf">Mutation Annotation Format</option> | 78 <option value="dog">Dog</option> |
223 <option value="icgc">Tab-separated file</option> | 79 </param> |
224 <option value="vcf">Variant Call Format</option> | 80 </when> |
225 </param> | 81 |
226 <when value='maf'> | 82 <when value="get_sigmut"> |
227 <param name="maf_file" type="data" format="maf" label="select VC file" help="Select the input file in MAF format." /> | 83 <conditional name="vcfile_input"> |
228 </when> | 84 <param name="vcfile" type="select" label="VC file" help="Select the format of your input data"> |
229 <when value='icgc'> | 85 <option value="maf">Mutation Annotation Format</option> |
230 <param name="icgc_file" type="data" format="txt" label="select VC file" help="Select the input file in ICGC format." /> | 86 <option value="icgc">Tab-separated file</option> |
231 </when> | 87 <option value="vcf">Variant Call Format</option> |
232 <when value='vcf'> | 88 </param> |
233 <param name="vcf_file" type="data" format="vcf" label="select VC file" help="Select the input file in VCF format." /> | 89 <when value='maf'> |
234 </when> | 90 <param name="maf_file" type="data" format="maf" label="select VC file" help="Select the input file in MAF format." /> |
235 </conditional> | 91 </when> |
236 | 92 <when value='icgc'> |
237 <param name="refgendat" type="select" label="Reference genome to be analyzed" help="Use the following reference genome:"> | 93 <param name="icgc_file" type="data" format="txt" label="select VC file" help="Select the input file in ICGC format." /> |
238 <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option> | 94 </when> |
95 <when value='vcf'> | |
96 <param name="vcf_file" type="data" format="vcf" label="select VC file" help="Select the input file in VCF format." /> | |
97 </when> | |
98 </conditional> | |
99 | |
100 <param name="refgendat" type="select" label="Reference genome to be analyzed" help="Use the following reference genome:"> | |
101 <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option> | |
239 <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option> | 102 <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option> |
240 <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option> | 103 <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option> |
241 <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option> | 104 <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option> |
242 <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option> | 105 <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option> |
243 <option value="c_elegans">Caenorhabditis elegans</option> | 106 <option value="c_elegans">Caenorhabditis elegans</option> |
244 <option value="dog">Dog</option> | 107 <option value="dog">Dog</option> |
245 </param> | 108 </param> |
246 | 109 |
247 <conditional name="bed_input"> | 110 <!-- implement bed when test available --> |
248 <param name="bedfile" type="select" label="BED file" help="Input a BED file"> | 111 <!-- <conditional name="bed_input"> |
249 <option value="yes">Yes</option> | 112 <param name="bedfile" type="select" label="BED file" help="Input a BED file"> |
250 <option value="no" selected="true">No</option> | 113 <option value="yes">Yes</option> |
251 </param> | 114 <option value="no" selected="true">No</option> |
252 <when value='yes'> | 115 </param> |
253 <param name="bed_file" format="bed" type="data" label="Use a BED file containing the set of regions" help="Provide a BED file"/> | 116 <when value='yes'> |
254 </when> | 117 <param name="bed_file" format="bed" type="data" label="Use a BED file containing the set of regions" help="Provide a BED file"/> |
255 <when value='no'> | 118 </when> |
256 </when> | 119 <when value='no'> |
257 </conditional> | 120 </when> |
258 <param name="plot" type="boolean" truevalue="true" label="Produce plot results?" checked="False" help="Show plots"/> | 121 </conditional> --> |
259 <param name="exome" type="boolean" label="Use only the exome?" checked="False" help="Use exome"/> | 122 <!-- implement exome functionality when test available --> |
260 <param name="chrom_based" type="boolean" label="Create the matrices on a per chromosome basis?" checked="False" help="Show snvs"/> | 123 <!-- <param name="exome" type="boolean" label="Use only the exome?" checked="False" help="Use exome"/> --> |
261 <param name="tsb_stat" type="boolean" truevalue="true" label="Performs a transcriptional strand bias test?" checked="False" help="Show snvs"/> | 124 <!-- implement chrom_based functionality when test available --> |
262 <param name="seqInfo" type="boolean" truevalue="true" label="Export sequence information?" checked="False" help="Show sequence information"/> | 125 <!--<param name="chrom_based" type="boolean" label="Create the matrices on a per chromosome basis?" checked="False" help="Show snvs"/> --> |
263 <param name="gs" type="boolean" label="Performs gene strand bias test?" checked="False" help="Show snvs"/> | 126 <param name="tsb_stat" type="boolean" truevalue="true" label="Performs a transcriptional strand bias test?" checked="False" help="Show snvs"/> |
264 </when> | 127 <param name="seqInfo" type="boolean" truevalue="true" label="Export sequence information?" checked="False" help="Show sequence information"/> |
265 </conditional> | 128 <param name="gs" type="boolean" label="Performs gene strand bias test?" checked="False" help="Show snvs"/> |
266 </inputs> | 129 </when> |
267 | 130 </conditional> |
268 <outputs> | 131 </inputs> |
269 <data format="txt" name="logref" label="Log file: Install a Reference Genome"> | 132 |
270 <filter>set_analysis['choices'] == 'install_genome'</filter> | 133 <outputs> |
271 </data> | 134 <data format="txt" name="logref" label="Log file: Install a Reference Genome" |
272 | 135 from_work_dir="./install.log"> |
273 <data format="txt" name="logsmt" label="Log file: Calculate Mutational Signatures"> | 136 <filter>set_analysis['choices'] == 'install_genome'</filter> |
274 <filter>set_analysis['choices'] == 'get_sigmut'</filter> | 137 </data> |
275 </data> | 138 <data format="txt" name="logsmt" label="Log file: Calculate Mutational Signatures" |
276 | 139 from_work_dir="run_dir/logs/SigProfilerMatrixGenerator*.out"> |
277 <data format="pdf" name="SBS6" label="--> Plot SBS 6 Sig. Mut."> | 140 <filter>set_analysis['choices'] == 'get_sigmut'</filter> |
278 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 141 </data> |
279 </data> | 142 |
280 | 143 <data format="pdf" name="blinder" label="SBS Mutational Signatures plots (pdf)" |
281 <data format="pdf" name="SBS24" label="--> Plot SBS 24 Sig. Mut."> | 144 from_work_dir="./blinder.pdf" > |
282 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 145 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> |
283 </data> | 146 </data> |
284 | 147 |
285 <data format="pdf" name="SBS78" label="--> Plot SBS 78 Sig. Mut."> | 148 <!-- implement exome outputs when test available --> |
286 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 149 <!-- |
287 </data> | 150 <data format="txt" name="dbs_exome" label="DBS_exome.vcf"> |
288 | 151 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> |
289 <data format="pdf" name="SBS96" label="--> Plot SBS 96 Sig. Mut."> | 152 </data> |
290 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 153 <data format="txt" name="snv_exome" label="SNV_exome.vcf"> |
291 </data> | 154 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> |
292 | 155 </data> |
293 <data format="pdf" name="SBS384" label="--> Plot SBS 384 Sig. Mut."> | 156 |
294 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 157 <data format="txt" name="sig_exome" label="DBS 78 and so on Sig. Mut. EXOME"> |
295 </data> | 158 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> |
296 | 159 </data> |
297 <data format="pdf" name="SBS1536" label="--> Plot SBS 1536 Sig. Mut."> | 160 --> |
298 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 161 <data format="txt" name="tsb" label="Transcriptional Strand Biases" |
299 </data> | 162 from_work_dir="./transcriptional_strand_biases.txt" > |
300 | 163 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter> |
301 <data format="pdf" name="DBS78" label="--> Plot DBS 78 Sig. Mut."> | 164 </data> |
302 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 165 |
303 </data> | 166 <data format="txt" name="seqinfo" label="Mutational Signature detailed infos" |
304 | 167 from_work_dir="./information.txt" > |
305 <data format="pdf" name="DBS186" label="--> Plot DBS 186 Sig. Mut."> | 168 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> |
306 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 169 </data> |
307 </data> | 170 |
308 | 171 </outputs> |
309 <data format="pdf" name="ID_simple" label="--> Plot ID simple Sig. Mut."> | 172 <tests> |
310 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 173 <test> |
311 </data> | 174 <param name="choices" value="install_genome"/> |
312 | 175 <param name="refgendwn" value="GRCh38"/> |
313 <data format="pdf" name="ID_TSB" label="--> Plot ID TSB Sig. Mut."> | 176 <output name="logref" file="hg38_install.log" lines_diff="5"/> |
314 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 177 </test> |
315 </data> | 178 <test> |
316 | 179 <param name="choices" value="get_sigmut"/> |
317 <data format="pdf" name="ID_83" label="--> Plot ID 83 Sig. Mut."> | 180 <param name="refgendat" value="GRCh38"/> |
318 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 181 <param name="vcfile" value="vcf"/> |
319 </data> | 182 <param name="vcf_file" ftype="vcf" value="hg38.vcf"/> |
320 | 183 <param name="plot" value="True"/> |
321 <data format="pdf" name="ID_94" label="--> Plot ID 94 Sig. Mut."> | 184 <output name="logsmt" ftype="txt" file="sigmut.log" lines_diff="5" /> |
322 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 185 <output name="blinder" file="hg38_blinder.pdf" lines_diff="5" /> |
323 </data> | 186 </test> |
324 | 187 </tests> |
325 <data format="pdf" name="ID_96" label="--> Plot ID 96 Sig. Mut."> | 188 |
326 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | 189 <help><![CDATA[ |
327 </data> | 190 |
328 | 191 **SigProfiler** |
329 <data format="txt" name="Exo11" label="--> DBS_exome.vcf"> | 192 |
330 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 193 Background: |
331 </data> | 194 |
332 | 195 Cancer genomes evince somatic mutations, which are imprinted by |
333 <data format="txt" name="Exo12" label="--> SNV_exome.vcf"> | 196 different mutational processes, that give rise to diverse |
334 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 197 mutational signatures. Their analysis from single base |
335 </data> | 198 substitutions and their immediate sequencing context, allows the |
336 | 199 classification of small mutational events (including |
337 <data format="txt" name="Exo1" label="--> DBS 78 Sig. Mut. EXOME"> | 200 substitutions, insertions, deletions, and doublet substitutions) |
338 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 201 for better understanding the mutational processes that have |
339 </data> | 202 shaped a cancer genome. |
340 | 203 |
341 <data format="txt" name="Exo2" label="--> DBS 186 Sig. Mut. EXOME"> | 204 In this sense, SigProfiler constitutes a Galaxy-based wrapper of |
342 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 205 a computational method developed by Ludmil B. Alexandrov, that |
343 </data> | 206 allow the exploration and visualization of mutational patterns |
344 | 207 for all types of small mutational events. Specifically, the |
345 <data format="txt" name="Exo3" label="--> DBS 1248 Sig. Mut. EXOME"> | 208 following actions can be performed using SigProfiler wrapper: |
346 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 209 |
347 </data> | 210 1. Identify and categorize the mutations based on possible |
348 | 211 single nucleotide variants (SNVs), double base substitutions |
349 <data format="txt" name="Exo4" label="--> DBS 2976 Sig. Mut. EXOME"> | 212 (DBS), and insertions/deletions and provides further |
350 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 213 transcriptional strand bias categorization. Afterwards, the |
351 </data> | 214 classification of these mutations are integrated into distinct |
352 | 215 matrices. |
353 <data format="txt" name="Exo5" label="--> SBS 6 Sig. Mut. EXOME"> | 216 SigProfiler provides matrix generation support for SBS-6, |
354 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 217 SBS-96, SBS-1536, DBS-78 and DBS-1248. In addition, the |
355 </data> | 218 generation of mutational matrices of indels including |
356 | 219 ID-28 and ID-83 are procured. Besides, an ID-8628 matrix that |
357 <data format="txt" name="Exo6" label="--> SBS 24 Sig. Mut. EXOME"> | 220 extends the ID-83 classification is generated. |
358 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 221 SigProfiler examines transcriptional strand bias for single base |
359 </data> | 222 substitutions, doublet base substitutions, and small indels. It |
360 | 223 is evaluated whether a mutation occurs on the transcribed or the |
361 <data format="txt" name="Exo7" label="--> SBS 96 Sig. Mut. EXOME"> | 224 non-transcribed strand of well-annotated protein coding genes of |
362 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 225 a reference genome. Mutations found in the transcribed regions |
363 </data> | 226 of the genome are further subclassified as: (i) transcribed, |
364 | 227 (ii) un-transcribed, (iii) bi-directional, or (iv) unknown. |
365 <data format="txt" name="Exo8" label="--> SBS 384 Sig. Mut. EXOME"> | 228 |
366 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 229 2. Generation of plots of all types of mutational signatures as |
367 </data> | 230 well as all types of mutational patterns in cancer genomes. |
368 | 231 |
369 <data format="txt" name="Exo9" label="--> SBS 1536 Sig. Mut. EXOME"> | 232 Additional Information: |
370 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 233 |
371 </data> | 234 Classification of Single Base substitutions (SBSs): |
372 | 235 Single base substitutions (SBSs) are single DNA base-pairs |
373 <data format="txt" name="Exo10" label="--> SBS 6144 Sig. Mut. EXOME"> | 236 substituted with another single DNA base-pairs. The most |
374 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | 237 basic classification catalogues SBSs into six distinct |
375 </data> | 238 categories, including: C:G > A:T, C:G > G:C, C:G > T:A, |
376 | 239 T:A > A:T, T:A > C:G, and T:A > G:C. In practice, a C:G > A:T |
377 <data format="txt" name="sigRes" label="--> TSB: Significant Results"> | 240 substitution is denoted as either a C > A mutation using the |
378 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter> | 241 pyrimidine base or as a G > T mutation using the purine base. |
379 </data> | 242 In consequence, the most commonly used SBS-6 classification of |
380 | 243 single base substitutions can be written as: C > A, C > G, |
381 <data format="txt" name="TSB24" label="--> TSB: 24 Sig. Mut."> | 244 C > T, T > A, T > C, and T > G. |
382 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter> | 245 Additionally, the SBS-6 classification can be further |
383 </data> | 246 expanded by considering the base-pairs immediately |
384 | 247 adjacent 5′ and 3′ to the somatic mutation. Therefore, an |
385 <data format="txt" name="TSB384" label="--> TSB: 96 Sig. Mut."> | 248 extended classification for analysis of mutational signatures is |
386 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter> | 249 SBS-96, where each of the classes in SBS-6 is further elaborated |
387 </data> | 250 using one base adjacent at the 5′ of the mutation and one base |
388 | 251 adjacent at the 3′ of the mutation. |
389 <data format="txt" name="TSB6144" label="--> TSB: 1536 Sig. Mut."> | 252 Logically, SBS-96 can be further elaborated by including |
390 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter> | 253 additional 5′ and 3′ adjacent context. Each of the six single |
391 </data> | 254 base substitutions in SBS-6 has 256 possible pentanucleotides |
392 | 255 resulting in a classification with 1536 possible channels. |
393 | 256 |
394 <data format="txt" name="SeqInf1" label="--> DBS 78 Sig. Mut. ALL"> | 257 Classification of Doublet Base substitutions (DBSs): |
395 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 258 Doublet base substitutions (DBSs) are somatic mutations in which |
396 </data> | 259 a set of two adjacent DNA base-pairs is simultaneously |
397 | 260 substituted with another set of two adjacent DNA base-pairs. An |
398 <data format="txt" name="SeqInf2" label="--> DBS 186 Sig. Mut. ALL"> | 261 example of a DBS is a set of CT:GA base-pairs mutating to a set |
399 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 262 of AA:TT base-pairs, which is usually denoted as CT:GA > AA:TT. |
400 </data> | 263 It should be noted that a CT:GA > AA:TT mutation can be |
401 | 264 equivalently written as either a CT > AA mutation. Overall, the |
402 <data format="txt" name="SeqInf3" label="--> DBS 1248 Sig. Mut. ALL"> | 265 basic classification catalogues DBSs into 78 distinct categories |
403 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 266 denoted as the DBS-78 matrix. |
404 </data> | 267 Similarly, we can expand the characterization of DBS mutations |
405 | 268 by considering the 5′ and 3′ adjacent contexts. With |
406 <data format="txt" name="SeqInf4" label="--> DBS 2976 Sig. Mut. ALL"> | 269 seventy-eight possible DBS mutations having sixteen possible |
407 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 270 tetranucleotides each, this context expansion results in 1248 |
408 </data> | 271 possible channels denoted as the DBS-1248 context. |
409 | 272 |
410 <data format="txt" name="SeqInf5" label="--> SBS 6 Sig. Mut. ALL"> | 273 Classification of small insertions and deletions (IDs): |
411 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 274 A somatic insertion is the incorporation of a set of base-pairs |
412 </data> | 275 that lengthens a chromosome, while a somatic deletion is the |
413 | 276 removing of a set of existing base-pairs from a given location |
414 <data format="txt" name="SeqInf6" label="--> SBS 24 Sig. Mut. ALL"> | 277 of a chromosome. |
415 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 278 Unfortunately, indel classification cannot be performed |
416 </data> | 279 analogously to SBS or DBS classifications, where the immediate |
417 | 280 sequencing context flanking each mutation was |
418 <data format="txt" name="SeqInf7" label="--> SBS 96 Sig. Mut. ALL"> | 281 utilized to subclassify these mutational events. |
419 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 282 Consequently, indels (IDs) are classified as single base-pair |
420 </data> | 283 or longer events. They can be further subclassified as either a |
421 | 284 C:G or a T:A indel, while longer indels can also be |
422 <data format="txt" name="SeqInf8" label="--> SBS 384 Sig. Mut. ALL"> | 285 subclassified based on their lengths: 2 bp, 3 bp, 4 bp, and |
423 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 286 5 + bp. |
424 </data> | 287 |
425 | 288 Incorporation of transcription Strand Bias (TSB): |
426 <data format="txt" name="SeqInf9" label="--> SBS 1536 Sig. Mut. ALL"> | 289 The mutational classifications described above allow the |
427 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 290 characterization of mutational patterns of single base |
428 </data> | 291 substitutions, doublet base substitutions, and small insertions |
429 | 292 and deletions. Nevertheless, these classifications can be |
430 <data format="txt" name="SeqInf10" label="--> SBS 6144 Sig. Mut. ALL"> | 293 further elaborated by incorporating strand bias. Mutations |
431 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 294 from the same type are expected to be equally distributed across the two |
432 </data> | 295 DNA strands. However, in many cases an asymmetric number of mutations are |
433 | 296 observed due to either one of the strands being preferentially |
434 | 297 repaired or one of the strands having a higher propensity for |
435 <data format="txt" name="SeqInf11" label="--> ID 28 Sig. Mut. ALL"> | 298 being damaged. To sub-classify mutations based on their |
436 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 299 transcriptional strand bias, the pyrimidine orientation with |
437 </data> | 300 respect to the locations of well-annotated protein coding genes |
438 | 301 on a genome is considered. |
439 <data format="txt" name="SeqInf12" label="--> ID 83 Sig. Mut. ALL"> | 302 |
440 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 303 Running SigProfiler: |
441 </data> | 304 |
442 | 305 1. Reference Genomes: |
443 <data format="txt" name="SeqInf13" label="--> ID 94 Sig. Mut. ALL"> | 306 Before using SigProfiler, the installation of a reference genome |
444 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 307 is demanded. By default, the tool supports the following |
445 </data> | 308 reference genomes: |
446 | 309 |
447 <data format="txt" name="SeqInf14" label="--> ID 96 Sig. Mut. ALL"> | 310 Human: GRCh37 & GRCh38 |
448 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 311 |
449 </data> | 312 Mouse: mm9 & mm10 |
450 | 313 |
451 <data format="txt" name="SeqInf15" label="--> ID 415 Sig. Mut. ALL"> | 314 Rat: rn6 |
452 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 315 |
453 </data> | 316 Nematode: c_elegans |
454 | 317 |
455 <data format="txt" name="SeqInf16" label="--> ID 8628 Sig. Mut. ALL"> | 318 A right command line should look like: |
456 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | 319 |
457 </data> | 320 sigprofiler -ig GRCh37 |
458 | 321 |
459 </outputs> | 322 2. Mutational signatures calculation: |
460 | 323 |
461 <tests> | 324 After successful installation of a reference genome, SigProfiler |
462 <test> | 325 can be applied to files containing somatic mutations in multiple |
463 <conditional name="set_analysis"> | 326 formats, for transforming these mutational catalogues into mutational |
464 <param name="choices" value="install_genome"/> | 327 matrices. Specifically, the tool can read data formats such as |
465 <param name="refgendwn" ftype="fasta" value="c_elegans"/> | 328 Variant Calling Format (VCF) and Mutation Annotation Format |
466 </conditional> | 329 (MAF) and the following parameters should be provided for |
467 <output name="logref" file="c_elegans.log" lines_diff="5"/> | 330 generating the diverse matrices and plots: |
468 </test> | 331 |
469 | 332 --name | -n = Project name |
470 | 333 --genome | -g = Reference Genome |
471 <test> | 334 -files | -f = Absolute path where the input mutation files are located |
472 <conditional name="set_analysis"> | 335 |
473 <param name="choices" value="get_sigmut"/> | 336 A right command line should look like: |
474 <param name="refgendat" ftype="fasta" value="c_elegans"/> | 337 |
475 <conditional name="vcfile_input"> | 338 sigprofiler -n MYPROJECT -g GRCh37 -f /path_to_folder_with_VCF_files/ -p |
476 <param name="vcfile" value="icgc"/> | 339 |
477 <param name="icgc_file" ftype="txt" value="test_matrix.txt"/> | 340 **Options** |
478 </conditional> | 341 --version show program's version number and exit |
479 <conditional name="bed_input"> | 342 |
480 <param name="bedfile" value="no"/> | 343 -h, --help show this help message and exit |
481 </conditional> | 344 |
482 <param name="plot" value="True"/> | 345 --install_genome Install de novo any of the following reference |
483 </conditional> | 346 genomes: 'GRCh37', 'GRCh38', 'mm9' or 'mm10'. |
484 | 347 |
485 <output name="ID_simple" file="ID_simple.pdf" lines_diff="5"/> | 348 --name=APPENDIX Provide a project name |
486 <output name="ID_TSB" file="ID_TSB.pdf" lines_diff="5"/> | 349 |
487 <output name="ID_83" file="ID_83.pdf" lines_diff="5"/> | 350 --genome=NAME Provide a reference genome (ex: GRCh37, GRCh38, |
488 | 351 mm9 or mm10). |
489 </test> | 352 |
490 | 353 --files=Abs_path Path where the input vcf files are located |
491 </tests> | 354 |
492 | 355 --exome Use only the exome or not |
493 <help><![CDATA[ | 356 |
494 | 357 --bed=FILE BED file containing the set of regions to be used |
495 **SigProfiler** | 358 in generating the matrices |
496 | 359 |
497 This script configures the SigProfiler analysis pipeline. | 360 --chrom Create the matrices on a per chromosome basis |
498 You must specify a VCF file for at least one sample. | 361 |
499 | 362 --plot Generate the plots for each context |
500 | 363 |
501 **Options** | 364 --tsb Performs a transcriptional strand bias test for the |
502 --version show program's version number and exit | 365 24, 384, and 6144 contexts |
503 | 366 |
504 -h, --help show this help message and exit | 367 --gs Performs a gene strand bias test |
505 | 368 |
506 --install_genome Install de novo any of the following reference | 369 For further info see: https://github.com/AlexandrovLab/SigProfilerMatrixGenerator |
507 genomes: 'GRCh37', 'GRCh38', 'mm9' or 'mm10'. | 370 |
508 | 371 ]]></help> |
509 --name=APPENDIX Provide a project name | 372 |
510 | 373 <citations> |
511 --genome=NAME Provide a reference genome (ex: GRCh37, GRCh38, | 374 <citation type="doi">10.1186/s12864-019-6041-2</citation> |
512 mm9 or mm10). | 375 </citations> |
513 | |
514 --files=Abs_path Path where the input vcf files are located | |
515 | |
516 --exome Use only the exome or not | |
517 | |
518 --bed=FILE BED file containing the set of regions to be used | |
519 in generating the matrices | |
520 | |
521 --chrom Create the matrices on a per chromosome basis | |
522 | |
523 --plot Generate the plots for each context | |
524 | |
525 --tsb Performs a transcriptional strand bias test for the | |
526 24, 384, and 6144 contexts | |
527 | |
528 --gs Performs a gene strand bias test | |
529 | |
530 For further info see: https://github.com/AlexandrovLab/SigProfilerMatrixGenerator | |
531 | |
532 ]]></help> | |
533 | |
534 <citations> | |
535 <citation type="doi">10.1186/s12864-019-6041-2</citation> | |
536 </citations> | |
537 | 376 |
538 </tool> | 377 </tool> |