diff sigmut.xml @ 1:02861b32a62f draft default tip

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sigmut commit bba3eb3950b8772758cc6f19747172be7413ddd9"
author artbio
date Sun, 14 Jun 2020 20:27:29 -0400
parents 9f48c5d97be8
children
line wrap: on
line diff
--- a/sigmut.xml	Mon Mar 16 06:15:02 2020 -0400
+++ b/sigmut.xml	Sun Jun 14 20:27:29 2020 -0400
@@ -1,538 +1,377 @@
 <tool id="SigProfiler" name="SigProfiler" version="@VERSION@">
-	<description>SigProfiler performs the mutational signature characterization from VCF files.</description>
-
-	<macros>
-		<import>sigmut_macros.xml</import>
-	</macros>
-	<expand macro="requirements"/>
-	<expand macro="stdio"/>
-	<command detect_errors="exit_code"><![CDATA[
-		@VERSION@
-		@pipefail@
-		#import os
-		#import random
-		#import datetime
-                #set job_dir=os.getcwd()
-		#set run_dir = job_dir + (' ' + str(random.randint(1,100000))).strip() + '/'
-		#set job_num = "Job_" + (' ' + str(random.randint(1,500))).strip()
-		
-		mkdir $run_dir &&
-
-		#if str( $set_analysis.choices ) == "install_genome":
-		ln -s -f '$__tool_directory__/install.log' '${logref}' &&
-		#else if str( $set_analysis.choices ) == "get_sigmut":
-		#set err_file = $run_dir + "logs/SigProfilerMatrixGenerator_" + $job_num + "_" + str($set_analysis.refgendat) + str(datetime.date.today()) + ".err"
-		#set log_file = $run_dir + "logs/SigProfilerMatrixGenerator_" + $job_num + "_" + str($set_analysis.refgendat) + str(datetime.date.today()) + ".out"
-		
-		#set plot1 = $run_dir + "output/plots/SBS_6_plots_" + $job_num + ".pdf" 
-		#set plot2 = $run_dir + "output/plots/SBS_24_plots_" + $job_num + ".pdf"
-		#set plot3 = $run_dir + "output/plots/SBS_78_plots_" + $job_num + ".pdf"
-		#set plot4 = $run_dir + "output/plots/SBS_96_plots_" + $job_num + ".pdf"
-		#set plot5 = $run_dir + "output/plots/SBS_384_plots_" + $job_num + ".pdf"
-		#set plot6 = $run_dir + "output/plots/SBS_1536_plots_" + $job_num + ".pdf"
-		#set plot7 = $run_dir + "output/plots/DBS_78_plots_" + $job_num + ".pdf"
-		#set plot8 = $run_dir + "output/plots/DBS_186_plots_" + $job_num + ".pdf"
-
-		#set plot9 = $run_dir + "output/plots/ID_simple_plots_" + $job_num + ".pdf"
-		#set plot10 = $run_dir + "output/plots/ID_TSB_plots_" + $job_num + ".pdf"
-		#set plot11 = $run_dir + "output/plots/ID_83_plots_" + $job_num + ".pdf"
-		#set plot12 = $run_dir + "output/plots/ID_94_plots_" + $job_num + ".pdf"
-		#set plot13 = $run_dir + "output/plots/ID_96_plots_" + $job_num + ".pdf"
-		
-		#set exo1 = $run_dir + "output/DBS/" + $job_num + ".DBS78" + ".exome" 
-		#set exo2 = $run_dir + "output/DBS/" + $job_num + ".DBS186" + ".exome"
-		#set exo3 = $run_dir + "output/DBS/" + $job_num + ".DBS1248" + ".exome"
-		#set exo4 = $run_dir + "output/DBS/" + $job_num + ".DBS2976" + ".exome"
-
-		#set exo5 = $run_dir + "output/SBS/" + $job_num + ".SBS6" + ".exome"
-		#set exo6 = $run_dir + "output/SBS/" + $job_num + ".SBS24" + ".exome"
-		#set exo7 = $run_dir + "output/SBS/" + $job_num + ".SBS96" + ".exome"
-		#set exo8 = $run_dir + "output/SBS/" + $job_num + ".SBS384" + ".exome"
-		#set exo9 = $run_dir + "output/SBS/" + $job_num + ".SBS1536" + ".exome"
-		#set exo10 = $run_dir + "output/SBS/" + $job_num + ".SBS6144" + ".exome"
-
-		#set exo11 = $run_dir + "output/vcf_files/DBS/" + $job_num + "_" + "DBS_exome.vcf"
-		#set exo12 = $run_dir + "output/vcf_files/SNV/" + $job_num + "_" + "SNV_exome.vcf"
-		
-		#set tsb1  = $run_dir + "output/TSB/strandBiasTest_24.txt"
-		#set tsb2  = $run_dir + "output/TSB/strandBiasTest_384.txt"
-		#set tsb3  = $run_dir + "output/TSB/strandBiasTest_6144.txt"
-		#set tsb4  = $run_dir + "output/TSB/significantResults_strandBiasTest.txt"
-
-		#set seqinf1 = $run_dir + "output/DBS/" + $job_num + ".DBS78" + ".all" 
-		#set seqinf2 = $run_dir + "output/DBS/" + $job_num + ".DBS186" + ".all"
-		#set seqinf3 = $run_dir + "output/DBS/" + $job_num + ".DBS1248" + ".all"
-		#set seqinf4 = $run_dir + "output/DBS/" + $job_num + ".DBS2976" + ".all"
-
-		#set seqinf5 = $run_dir + "output/SBS/" + $job_num + ".SBS6" + ".all"
-		#set seqinf6 = $run_dir + "output/SBS/" + $job_num + ".SBS24" + ".all"
-		#set seqinf7 = $run_dir + "output/SBS/" + $job_num + ".SBS96" + ".all"
-		#set seqinf8 = $run_dir + "output/SBS/" + $job_num + ".SBS384" + ".all"
-		#set seqinf9 = $run_dir + "output/SBS/" + $job_num + ".SBS1536" + ".all"
-		#set seqinf10 = $run_dir + "output/SBS/" + $job_num + ".SBS6144" + ".all"
-
-		#set seqinf11 = $run_dir + "output/ID/" + $job_num + ".ID28" + ".all"
-		#set seqinf12 = $run_dir + "output/ID/" + $job_num + ".ID83" + ".all"
-		#set seqinf13 = $run_dir + "output/ID/" + $job_num + ".ID94" + ".all"
-		#set seqinf14 = $run_dir + "output/ID/" + $job_num + ".ID96" + ".all"
-		#set seqinf15 = $run_dir + "output/ID/" + $job_num + ".ID415" + ".all"
-		#set seqinf16 = $run_dir + "output/ID/" + $job_num + ".ID8628" + ".all"
-
-		ln -s -f '$log_file' '${logsmt}' &&
-
-		#if str($set_analysis.plot) == "true":
-		ln -s -f '$plot1' '${SBS6}' &&
-		ln -s -f '$plot2' '${SBS24}' &&
-		ln -s -f '$plot3' '${SBS78}' &&
-		ln -s -f '$plot4' '${SBS96}' &&
-		ln -s -f '$plot5' '${SBS384}' &&
-		ln -s -f '$plot6' '${SBS1536}' &&
-		ln -s -f '$plot7' '${DBS78}' &&
-		ln -s -f '$plot8' '${DBS186}' &&
-		ln -s -f '$plot9' '${ID_simple}' &&
-		ln -s -f '$plot10' '${ID_TSB}' &&
-		ln -s -f '$plot11' '${ID_83}' &&
-		ln -s -f '$plot12' '${ID_94}' &&
-		ln -s -f '$plot13' '${ID_96}' &&
-		
-		ln -s -f '$seqinf1' '${SeqInf1}' &&
-                ln -s -f '$seqinf2' '${SeqInf2}' &&
-                ln -s -f '$seqinf3' '${SeqInf3}' &&
-                ln -s -f '$seqinf4' '${SeqInf4}' &&
-                ln -s -f '$seqinf5' '${SeqInf5}' &&
-                ln -s -f '$seqinf6' '${SeqInf6}' &&
-                ln -s -f '$seqinf7' '${SeqInf7}' &&
-                ln -s -f '$seqinf8' '${SeqInf8}' &&
-                ln -s -f '$seqinf9' '${SeqInf9}' &&
-                ln -s -f '$seqinf10' '${SeqInf10}' &&
-		
-                ln -s -f '$seqinf11' '${SeqInf11}' &&
-                ln -s -f '$seqinf12' '${SeqInf12}' &&
-                ln -s -f '$seqinf13' '${SeqInf13}' &&
-                ln -s -f '$seqinf14' '${SeqInf14}' &&
-		ln -s -f '$seqinf15' '${SeqInf15}' &&
-		ln -s -f '$seqinf16' '${SeqInf16}' &&
-		
-		#end if
+    <description>performs  mutational signature characterization from variant files</description>
 
-		#if str($set_analysis.exome) == "true":
-		ln -s -f '$exo1' '${Exo1}' &&
-		ln -s -f '$exo2' '${Exo2}' &&
-		ln -s -f '$exo3' '${Exo3}' &&
-		ln -s -f '$exo4' '${Exo4}' &&
-		ln -s -f '$exo5' '${Exo5}' &&
-		ln -s -f '$exo6' '${Exo6}' &&
-		ln -s -f '$exo7' '${Exo7}' &&
-		ln -s -f '$exo8' '${Exo8}' &&
-		ln -s -f '$exo9' '${Exo9}' &&
-		ln -s -f '$exo10' '${Exo10}' &&
-		ln -s -f '$exo11' '${Exo11}' &&
-		ln -s -f '$exo12' '${Exo12}' &&
-		#end if
-
-		#if str($set_analysis.tsb_stat) == "true":
-                ln -s -f '$tsb1' '${TSB24}' &&
-                ln -s -f '$tsb2' '${TSB384}' &&
-                ln -s -f '$tsb3' '${TSB6144}' &&
-		ln -s -f '$tsb4' '${sigRes}' &&
-		#end if
-
-
-		#if str($set_analysis.seqInfo) == "true":
-		ln -s -f '$seqinf1' '${SeqInf1}' &&
-		ln -s -f '$seqinf2' '${SeqInf2}' &&
-		ln -s -f '$seqinf3' '${SeqInf3}' &&
-		ln -s -f '$seqinf4' '${SeqInf4}' &&
-		ln -s -f '$seqinf5' '${SeqInf5}' &&
-		ln -s -f '$seqinf6' '${SeqInf6}' &&
-		ln -s -f '$seqinf7' '${SeqInf7}' &&
-		ln -s -f '$seqinf8' '${SeqInf8}' &&
-		ln -s -f '$seqinf9' '${SeqInf9}' &&
-		ln -s -f '$seqinf10' '${SeqInf10}' &&
-		ln -s -f '$seqinf11' '${SeqInf11}' &&
-                ln -s -f '$seqinf12' '${SeqInf12}' &&
-                ln -s -f '$seqinf13' '${SeqInf13}' &&
-                ln -s -f '$seqinf14' '${SeqInf14}' &&
-		ln -s -f '$seqinf15' '${SeqInf15}' &&
-		ln -s -f '$seqinf16' '${SeqInf16}' &&
-		#end if
-
-
-		#if str( $set_analysis.vcfile_input.vcfile ) == "maf":
-                #set $infile = $run_dir + 'snps.maf'
+    <macros>
+        <import>sigmut_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command detect_errors="exit_code"><![CDATA[
+        @VERSION@
+        @pipefail@
+        BIN=`which sigprofiler | sed 's,/sigprofiler,,g'` &&
+        echo \$BIN &&
+        chmod -R 777 \$BIN &&
+        mkdir run_dir &&
+        #if str( $set_analysis.choices ) == "get_sigmut":
+            #if str( $set_analysis.vcfile_input.vcfile ) == "maf":
+                #set $infile = 'run_dir/snps.maf'
                 ln -s -f '$set_analysis.vcfile_input.maf_file' '$infile' &&
-                #else if str( $set_analysis.vcfile_input.vcfile ) == "icgc":
-		#set $infile = $run_dir + 'snps.txt'
-		ln -s -f '$set_analysis.vcfile_input.icgc_file' '$infile' &&
-                #else if str( $set_analysis.vcfile_input.vcfile ) == "vcf":
-                #set $infile = $run_dir + 'snps.vcf'
+            #else if str( $set_analysis.vcfile_input.vcfile ) == "icgc":
+                #set $infile = 'run_dir/snps.txt'
+		        ln -s -f '$set_analysis.vcfile_input.icgc_file' '$infile' &&
+            #else if str( $set_analysis.vcfile_input.vcfile ) == "vcf":
+                #set $infile = 'run_dir/snps.vcf'
                 ln -s -f '$set_analysis.vcfile_input.vcf_file' '$infile' &&
-		#end if
+            #end if
+        #end if
+        
+        sigprofiler
+        
+        #if str( $set_analysis.choices ) == "install_genome":
+            -ig $set_analysis.refgendwn > install.log
+        #else if str( $set_analysis.choices ) == "get_sigmut":
+            -g $set_analysis.refgendat
+            -f 'run_dir'
+            -n "project"
+            -p
+## ! implement exome functionality when good test available
+##            #if str( $set_analysis.exome ) == "true":
+##                -e
+##            #end if
+## ! implement per chromosome functionality when good test available
+##            #if str( $set_analysis.chrom_based ) == "true":
+##                -c
+##            #end if
+            #if str( $set_analysis.tsb_stat ) == "true":
+                -t
+            #end if
+            #if str( $set_analysis.gs ) == "true":
+                -s
+            #end if
+            ##-b $set_analysis.bed ### to be done
+            && pdfcombine -f -s -o blinder.pdf run_dir/output/plots/*.pdf
+            && ls run_dir/logs/
+            #if str( $set_analysis.tsb_stat ) == "true":
+                && tail -n +1 run_dir/output/TSB/*.txt > transcriptional_strand_biases.txt
+           #end if
+           #if $set_analysis.seqInfo:
+               && tail -n +1 run_dir/output/*/*.all > information.txt
+           #end if
+        #end if
+        ]]></command>
 
-		#end if
-		
-		sigprofiler	
-		#if str( $set_analysis.choices ) == "install_genome":
-		-ig $set_analysis.refgendwn
-		#else if str( $set_analysis.choices ) == "get_sigmut":
-		-n $job_num
-		-g $set_analysis.refgendat
-		-f $run_dir
-
-		#if str( $set_analysis.exome ) == "true":
-		-e
-		#end if
-		#if str( $set_analysis.chrom_based ) == "true":
-		-c
-		#end if
-		#if str( $set_analysis.plot ) == "true":
-		-p
-		#end if
-		#if str( $set_analysis.tsb_stat ) == "true":
-		-t
-		#end if
-		#if str( $set_analysis.gs ) == "true":
-		-s
-		#end if
-		##-b $set_analysis.bed
-		#end if
-		]]></command>
+    <inputs>
+        <conditional name="set_analysis">
+            <param name="choices" type="select" label="Which of the following jobs do you want perform?">
+                <option value="install_genome">Install 'de novo' a reference genome </option>
+                <option value="get_sigmut">Obtain the mutational signatures from VCF files</option>
+            </param>
+            <when value="install_genome">
+                <param name="refgendwn" type="select" label="Reference genome" help="Get data from any of the following reference genomes:">
+                    <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option>
+                    <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option>
+                    <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option>
+                    <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option>
+                    <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option>
+                    <option value="c_elegans">Caenorhabditis elegans</option>
+                    <option value="dog">Dog</option>
+                </param>
+            </when>
 
-	<inputs>
-		<conditional name="set_analysis">
-			<param name="choices" type="select" label="Which of the following jobs do you want perform?">
-				<option value="install_genome">Install 'de novo' a reference genome </option>
-				<option value="get_sigmut">Obtain the mutational signatures from VCF files</option>
-			</param>
-			<when value="install_genome">
-				<param name="refgendwn" type="select" label="Reference genome" help="Get data from any of the following reference genomes:">
-					<option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option>
-					<option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option>
-					<option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option>
-					<option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option>
-					<option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option>
-					<option value="c_elegans">Caenorhabditis elegans</option>
-                                        <option value="dog">Dog</option>
-				</param>
-			</when>
+            <when value="get_sigmut">
+                <conditional name="vcfile_input">
+                    <param name="vcfile" type="select" label="VC file" help="Select the format of your input data">
+                        <option value="maf">Mutation Annotation Format</option>
+                        <option value="icgc">Tab-separated file</option>
+                        <option value="vcf">Variant Call Format</option>
+                    </param>
+                    <when value='maf'>
+                        <param name="maf_file" type="data" format="maf" label="select VC file" help="Select the input file in MAF format." />
+                    </when>
+                    <when value='icgc'>
+                        <param name="icgc_file" type="data" format="txt" label="select VC file" help="Select the input file in ICGC format." />
+                    </when>
+                    <when value='vcf'>
+                        <param name="vcf_file" type="data" format="vcf" label="select VC file" help="Select the input file in VCF format." />
+                    </when>
+                </conditional>
 
-			<when value="get_sigmut">
-				<conditional name="vcfile_input">
-					<param name="vcfile" type="select" label="VC file" help="Select the format of your input data">
-						<option value="maf">Mutation Annotation Format</option>
-						<option value="icgc">Tab-separated file</option>
-						<option value="vcf">Variant Call Format</option>
-					</param>
-					<when value='maf'>
-						<param name="maf_file" type="data" format="maf" label="select VC file" help="Select the input file in MAF format." />
-					</when>
-					<when value='icgc'>
-						<param name="icgc_file" type="data" format="txt" label="select VC file" help="Select the input file in ICGC format." />
-					</when>
-					<when value='vcf'>
-						<param name="vcf_file" type="data" format="vcf" label="select VC file" help="Select the input file in VCF format." />
-					</when>
-				</conditional>
-
-				<param name="refgendat" type="select" label="Reference genome to be analyzed" help="Use the following reference genome:">
-					<option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option>
+                <param name="refgendat" type="select" label="Reference genome to be analyzed" help="Use the following reference genome:">
+                    <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option>
                                         <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option>
                                         <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option>
                                         <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option>
                                         <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option>
                                         <option value="c_elegans">Caenorhabditis elegans</option>
                                         <option value="dog">Dog</option>
-				</param>
+                </param>
 
-				<conditional name="bed_input">
-					<param name="bedfile" type="select" label="BED file" help="Input a BED file">
-						<option value="yes">Yes</option>
-						<option value="no" selected="true">No</option>
-					</param>
-					<when value='yes'>
-						<param name="bed_file" format="bed" type="data" label="Use a BED file containing the set of regions" help="Provide a BED file"/>
-					</when>
-					<when value='no'>
-					</when>
-				</conditional>
-				<param name="plot" type="boolean" truevalue="true" label="Produce plot results?" checked="False" help="Show plots"/>
-				<param name="exome" type="boolean" label="Use only the exome?" checked="False" help="Use exome"/>
-				<param name="chrom_based" type="boolean" label="Create the matrices on a per chromosome basis?" checked="False" help="Show snvs"/>
-				<param name="tsb_stat" type="boolean" truevalue="true" label="Performs a transcriptional strand bias test?" checked="False" help="Show snvs"/>
-				<param name="seqInfo" type="boolean" truevalue="true" label="Export sequence information?" checked="False" help="Show sequence information"/>
-				<param name="gs" type="boolean" label="Performs gene strand bias test?" checked="False" help="Show snvs"/>
-			</when>
-		</conditional>
-	</inputs>
-
-	<outputs>
-		<data format="txt" name="logref" label="Log file: Install a Reference Genome">
-			<filter>set_analysis['choices'] == 'install_genome'</filter>
-		</data>
-
-                <data format="txt" name="logsmt" label="Log file: Calculate Mutational Signatures">
-                        <filter>set_analysis['choices'] == 'get_sigmut'</filter>
-                </data>
-
-		<data format="pdf" name="SBS6" label="--> Plot SBS 6 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
+<!-- implement bed when test available -->
+<!--                <conditional name="bed_input">
+                    <param name="bedfile" type="select" label="BED file" help="Input a BED file">
+                        <option value="yes">Yes</option>
+                        <option value="no" selected="true">No</option>
+                    </param>
+                    <when value='yes'>
+                        <param name="bed_file" format="bed" type="data" label="Use a BED file containing the set of regions" help="Provide a BED file"/>
+                    </when>
+                    <when value='no'>
+                    </when>
+                </conditional> -->
+                <!-- implement exome functionality when test available -->
+                <!-- <param name="exome" type="boolean" label="Use only the exome?" checked="False" help="Use exome"/> -->
+                <!-- implement chrom_based functionality when test available -->
+                <!--<param name="chrom_based" type="boolean" label="Create the matrices on a per chromosome basis?" checked="False" help="Show snvs"/> -->
+                <param name="tsb_stat" type="boolean" truevalue="true" label="Performs a transcriptional strand bias test?" checked="False" help="Show snvs"/>
+                <param name="seqInfo" type="boolean" truevalue="true" label="Export sequence information?" checked="False" help="Show sequence information"/>
+                <param name="gs" type="boolean" label="Performs gene strand bias test?" checked="False" help="Show snvs"/>
+            </when>
+        </conditional>
+    </inputs>
 
-		<data format="pdf" name="SBS24" label="--> Plot SBS 24 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
-
-		<data format="pdf" name="SBS78" label="--> Plot SBS 78 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
+    <outputs>
+        <data format="txt" name="logref" label="Log file: Install a Reference Genome"
+              from_work_dir="./install.log">
+            <filter>set_analysis['choices'] == 'install_genome'</filter>
+        </data>
+        <data format="txt" name="logsmt" label="Log file: Calculate Mutational Signatures"
+              from_work_dir="run_dir/logs/SigProfilerMatrixGenerator*.out">
+            <filter>set_analysis['choices'] == 'get_sigmut'</filter>
+        </data>
 
-		<data format="pdf" name="SBS96" label="--> Plot SBS 96 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
-
-		<data format="pdf" name="SBS384" label="--> Plot SBS 384 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
+        <data format="pdf" name="blinder" label="SBS Mutational Signatures plots (pdf)"
+              from_work_dir="./blinder.pdf" >
+            <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
+        </data>
 
-		<data format="pdf" name="SBS1536" label="--> Plot SBS 1536 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
-
-		<data format="pdf" name="DBS78" label="--> Plot DBS 78 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
+        <!-- implement exome outputs when test available -->
+        <!--
+        <data format="txt" name="dbs_exome" label="DBS_exome.vcf">
+            <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
+        </data>
+        <data format="txt" name="snv_exome" label="SNV_exome.vcf">
+            <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
+        </data>
+        
+        <data format="txt" name="sig_exome" label="DBS 78 and so on Sig. Mut. EXOME">
+            <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
+        </data>
+        -->
+        <data format="txt" name="tsb" label="Transcriptional Strand Biases"
+                      from_work_dir="./transcriptional_strand_biases.txt" >
+            <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter>
+        </data>
 
-		<data format="pdf" name="DBS186" label="--> Plot DBS 186 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
-
-		<data format="pdf" name="ID_simple" label="--> Plot ID simple Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
-
-		<data format="pdf" name="ID_TSB" label="--> Plot ID TSB Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
+        <data format="txt" name="seqinfo" label="Mutational Signature detailed infos"
+              from_work_dir="./information.txt" >
+            <filter>set_analysis['choices'] == 'get_sigmut' and  set_analysis['seqInfo'] is True</filter>
+        </data>
 
-		<data format="pdf" name="ID_83" label="--> Plot ID 83 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
-
-		<data format="pdf" name="ID_94" label="--> Plot ID 94 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
-
-		<data format="pdf" name="ID_96" label="--> Plot ID 96 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter>
-		</data>
-		
-		<data format="txt" name="Exo11" label="--> DBS_exome.vcf">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="choices" value="install_genome"/>
+            <param name="refgendwn" value="GRCh38"/>
+            <output name="logref" file="hg38_install.log" lines_diff="5"/>
+        </test>
+        <test>
+            <param name="choices" value="get_sigmut"/>
+            <param name="refgendat" value="GRCh38"/>
+            <param name="vcfile" value="vcf"/>
+            <param name="vcf_file" ftype="vcf" value="hg38.vcf"/>
+            <param name="plot" value="True"/>
+            <output name="logsmt" ftype="txt" file="sigmut.log" lines_diff="5" />
+            <output name="blinder" file="hg38_blinder.pdf"  lines_diff="5" />
+        </test>
+    </tests>
 
-		<data format="txt" name="Exo12" label="--> SNV_exome.vcf">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
-
-		<data format="txt" name="Exo1" label="--> DBS 78 Sig. Mut. EXOME">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
+    <help><![CDATA[
 
-		<data format="txt" name="Exo2" label="--> DBS 186 Sig. Mut. EXOME">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
+        **SigProfiler**
 
-		<data format="txt" name="Exo3" label="--> DBS 1248 Sig. Mut. EXOME">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
-
-		<data format="txt" name="Exo4" label="--> DBS 2976 Sig. Mut. EXOME">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
+        Background:
 
-		<data format="txt" name="Exo5" label="--> SBS 6 Sig. Mut. EXOME">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
-
-		<data format="txt" name="Exo6" label="--> SBS 24 Sig. Mut. EXOME">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
+        Cancer genomes evince somatic mutations, which are imprinted by
+        different mutational processes, that give rise to diverse
+        mutational signatures. Their analysis from single base
+        substitutions and their immediate sequencing context, allows the
+        classification of small mutational events (including
+        substitutions, insertions, deletions, and doublet substitutions)
+        for better understanding the mutational processes that have
+        shaped a cancer genome.
 
-		<data format="txt" name="Exo7" label="--> SBS 96 Sig. Mut. EXOME">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
-
-		<data format="txt" name="Exo8" label="--> SBS 384 Sig. Mut. EXOME">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
+        In this sense, SigProfiler constitutes a Galaxy-based wrapper of
+        a computational method developed by Ludmil B. Alexandrov, that
+        allow the exploration and visualization of mutational patterns
+        for all types of small mutational events. Specifically, the
+        following actions can be performed using SigProfiler wrapper:
 
-		<data format="txt" name="Exo9" label="--> SBS 1536 Sig. Mut. EXOME">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
-
-		<data format="txt" name="Exo10" label="--> SBS 6144 Sig. Mut. EXOME">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter>
-		</data>
+        1. Identify and categorize the mutations based on possible
+        single nucleotide variants (SNVs), double base substitutions
+        (DBS), and insertions/deletions and provides further
+        transcriptional strand bias categorization. Afterwards, the
+        classification of these mutations are integrated into distinct
+        matrices.
+        SigProfiler provides matrix generation support for SBS-6,
+        SBS-96, SBS-1536, DBS-78 and DBS-1248. In addition, the
+        generation of mutational matrices of indels including
+        ID-28 and ID-83 are procured. Besides, an ID-8628 matrix that
+        extends the ID-83 classification is generated.
+        SigProfiler examines transcriptional strand bias for single base
+        substitutions, doublet base substitutions, and small indels. It
+        is evaluated whether a mutation occurs on the transcribed or the
+        non-transcribed strand of well-annotated protein coding genes of
+        a reference genome. Mutations found in the transcribed regions
+        of the genome are further subclassified as: (i) transcribed,
+        (ii) un-transcribed, (iii) bi-directional, or (iv) unknown.
+           
+        2. Generation of plots of all types of mutational signatures as
+        well as all types of mutational patterns in cancer genomes.  
 
-		<data format="txt" name="sigRes" label="--> TSB: Significant Results">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter>
-		</data>
-
-		<data format="txt" name="TSB24" label="--> TSB: 24 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter>
-		</data>
-
-		<data format="txt" name="TSB384" label="--> TSB: 96 Sig. Mut.">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter>
-		</data>
+        Additional Information:
 
-                <data format="txt" name="TSB6144" label="--> TSB: 1536 Sig. Mut.">
-                        <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter>
-                </data>
-
-
-		<data format="txt" name="SeqInf1" label="--> DBS 78 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and  set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf2" label="--> DBS 186 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf3" label="--> DBS 1248 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf4" label="--> DBS 2976 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf5" label="--> SBS 6 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf6" label="--> SBS 24 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf7" label="--> SBS 96 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
+        Classification of Single Base substitutions (SBSs):
+        Single base substitutions (SBSs) are single DNA base-pairs
+        substituted with another single DNA base-pairs. The most
+        basic classification catalogues SBSs into six distinct
+        categories, including: C:G > A:T, C:G > G:C, C:G > T:A,
+        T:A > A:T, T:A > C:G, and T:A > G:C. In practice, a C:G > A:T
+        substitution is denoted as either a C > A mutation using the
+        pyrimidine base or as a G > T mutation using the purine base.
+        In consequence, the most commonly used SBS-6 classification of
+        single base substitutions can be written as: C > A, C > G,
+        C > T, T > A, T > C, and T > G.
+        Additionally, the SBS-6 classification can be further
+        expanded by considering the base-pairs immediately
+        adjacent 5′ and 3′ to the somatic mutation. Therefore, an
+        extended classification for analysis of mutational signatures is
+        SBS-96, where each of the classes in SBS-6 is further elaborated
+        using one base adjacent at the 5′ of the mutation and one base
+        adjacent at the 3′ of the mutation.
+        Logically, SBS-96 can be further elaborated by including
+        additional 5′ and 3′ adjacent context. Each of the six single
+        base substitutions in SBS-6 has 256 possible pentanucleotides
+        resulting in a classification with 1536 possible channels.
+           
+        Classification of Doublet Base substitutions (DBSs):
+        Doublet base substitutions (DBSs) are somatic mutations in which
+        a set of two adjacent DNA base-pairs is simultaneously
+        substituted with another set of two adjacent DNA base-pairs. An
+        example of a DBS is a set of CT:GA base-pairs mutating to a set
+        of AA:TT base-pairs, which is usually denoted as CT:GA > AA:TT.
+        It should be noted that a CT:GA > AA:TT mutation can be
+        equivalently written as either a CT > AA mutation.  Overall, the
+        basic classification catalogues DBSs into 78 distinct categories
+        denoted as the DBS-78 matrix.
+        Similarly, we can expand the characterization of DBS mutations
+        by considering the 5′ and 3′ adjacent contexts. With
+        seventy-eight possible DBS mutations having sixteen possible
+        tetranucleotides each, this context expansion results in 1248
+        possible channels denoted as the DBS-1248 context.
+        
+        Classification of small insertions and deletions (IDs):
+        A somatic insertion is the incorporation of a set of base-pairs
+        that lengthens a chromosome, while a somatic deletion is the
+        removing of a set of existing base-pairs from a given location
+        of a chromosome.
+        Unfortunately, indel classification cannot be performed
+        analogously to SBS or DBS classifications, where the immediate
+        sequencing context flanking each mutation was
+        utilized to subclassify these mutational events.
+        Consequently, indels (IDs) are classified as single base-pair
+        or longer events. They can be further subclassified as either a
+        C:G or a T:A indel, while longer indels can also be
+        subclassified based on their lengths: 2 bp, 3 bp, 4 bp, and
+        5 + bp.
 
-		<data format="txt" name="SeqInf8" label="--> SBS 384 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf9" label="--> SBS 1536 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf10" label="--> SBS 6144 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-
-		<data format="txt" name="SeqInf11" label="--> ID 28 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf12" label="--> ID 83 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf13" label="--> ID 94 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf14" label="--> ID 96 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf15" label="--> ID 415 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-		<data format="txt" name="SeqInf16" label="--> ID 8628 Sig. Mut. ALL">
-			<filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter>
-		</data>
-
-	</outputs>
+        Incorporation of transcription Strand Bias (TSB):
+        The mutational classifications described above allow the
+        characterization of mutational patterns of single base
+        substitutions, doublet base substitutions, and small insertions
+        and deletions. Nevertheless, these classifications can be
+        further elaborated by incorporating strand bias. Mutations
+        from the same type are expected to be equally distributed across the two
+        DNA strands. However, in many cases an asymmetric number of mutations are
+        observed due to either one of the strands being preferentially
+        repaired or one of the strands having a higher propensity for
+        being damaged. To sub-classify mutations based on their
+        transcriptional strand bias, the pyrimidine orientation with
+        respect to the locations of well-annotated protein coding genes
+        on a genome is considered.
 
-	<tests>
-		<test>
-			<conditional name="set_analysis">
-				<param name="choices" value="install_genome"/>
-				<param name="refgendwn" ftype="fasta" value="c_elegans"/>
-			</conditional>
-			<output name="logref" file="c_elegans.log" lines_diff="5"/>
-		</test>
+        Running SigProfiler:
+        
+        1. Reference Genomes:
+        Before using SigProfiler, the installation of a reference genome
+        is demanded. By default, the tool supports the following
+        reference genomes:
 
+                Human: GRCh37 & GRCh38
+
+                Mouse: mm9 & mm10
+
+                Rat: rn6
+
+                Nematode: c_elegans
+
+                A right command line should look like:
 
-		<test>
-			<conditional name="set_analysis">
-				<param name="choices" value="get_sigmut"/>
-				<param name="refgendat" ftype="fasta" value="c_elegans"/>
-				<conditional name="vcfile_input">
-					<param name="vcfile" value="icgc"/>
-					<param name="icgc_file" ftype="txt" value="test_matrix.txt"/>
-				</conditional>
-				<conditional name="bed_input">
-					<param name="bedfile" value="no"/>	
-				</conditional>
-				<param name="plot" value="True"/>
-			</conditional>
+                sigprofiler -ig GRCh37
 
-		<output name="ID_simple" file="ID_simple.pdf" lines_diff="5"/>
-		<output name="ID_TSB" file="ID_TSB.pdf" lines_diff="5"/>
-		<output name="ID_83" file="ID_83.pdf" lines_diff="5"/>
-
-		</test>
+        2. Mutational signatures calculation:
+        
+        After successful installation of a reference genome, SigProfiler
+        can be applied to files containing somatic mutations in multiple
+        formats, for transforming these mutational catalogues into mutational
+        matrices. Specifically, the tool can read data formats such as
+        Variant Calling Format (VCF) and Mutation Annotation Format
+        (MAF) and the following parameters should be provided for
+        generating the diverse matrices and plots:
+        
+        --name | -n = Project name
+        --genome | -g = Reference Genome
+        -files | -f = Absolute path where the input mutation files are located
 
-	</tests>
+        A right command line should look like:
 
-	<help><![CDATA[
-
-		**SigProfiler**
+        sigprofiler -n MYPROJECT -g GRCh37 -f /path_to_folder_with_VCF_files/ -p
 
-		This script configures the SigProfiler analysis pipeline.
-		You must specify a VCF file for at least one sample.
-
+        **Options**
+        --version               show program's version number and exit
 
-		**Options**
-		--version             	show program's version number and exit
+        -h, --help              show this help message and exit
 
-		-h, --help            	show this help message and exit
+        --install_genome    Install de novo any of the following reference
+                    genomes: 'GRCh37', 'GRCh38', 'mm9' or 'mm10'.
 
-		--install_genome	Install de novo any of the following reference
-					genomes: 'GRCh37', 'GRCh38', 'mm9' or 'mm10'.
-
-		--name=APPENDIX		Provide a project name
+        --name=APPENDIX     Provide a project name
 
-		--genome=NAME		Provide a reference genome (ex: GRCh37, GRCh38,
-					mm9 or mm10).
+        --genome=NAME       Provide a reference genome (ex: GRCh37, GRCh38,
+                    mm9 or mm10).
 
-		--files=Abs_path	Path where the input vcf files are located
+        --files=Abs_path    Path where the input vcf files are located
+
+        --exome         Use only the exome or not
 
-		--exome			Use only the exome or not
+        --bed=FILE      BED file containing the set of regions to be used
+                    in generating the matrices
+
+        --chrom         Create the matrices on a per chromosome basis
 
-		--bed=FILE		BED file containing the set of regions to be used
-					in generating the matrices
+        --plot          Generate the plots for each context
 
-		--chrom			Create the matrices on a per chromosome basis
-
-		--plot			Generate the plots for each context
+        --tsb           Performs a transcriptional strand bias test for the
+                    24, 384, and 6144 contexts
 
-		--tsb			Performs a transcriptional strand bias test for the
-					24, 384, and 6144 contexts
+        --gs            Performs a gene strand bias test
 
-		--gs			Performs a gene strand bias test
+        For further info see: https://github.com/AlexandrovLab/SigProfilerMatrixGenerator
 
-		For further info see: https://github.com/AlexandrovLab/SigProfilerMatrixGenerator
-
-		]]></help>
+        ]]></help>
 
-	<citations>
-		<citation type="doi">10.1186/s12864-019-6041-2</citation>
-	</citations>
+    <citations>
+        <citation type="doi">10.1186/s12864-019-6041-2</citation>
+    </citations>
 
 </tool>