comparison salsa2.xml @ 4:9a22227bb6d0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/salsa2 commit 41b8952cd0739db0b5eb62d5dd9ccc7bc0f1925f
author iuc
date Thu, 19 May 2022 14:17:24 +0000
parents f77f7a7f3b83
children 58a870ef434c
comparison
equal deleted inserted replaced
3:f77f7a7f3b83 4:9a22227bb6d0
1 <tool id="salsa" name="SALSA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> 1 <tool id="salsa" name="SALSA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <description>scaffold long read assemblies with Hi-C</description> 2 <description>scaffold long read assemblies with Hi-C</description>
3 <macros>
4 <token name="@TOOL_VERSION@">2.3</token>
5 <token name="@VERSION_SUFFIX@">3</token>
6 </macros>
3 <xrefs> 7 <xrefs>
4 <xref type="bio.tools">SALSA</xref> 8 <xref type="bio.tools">SALSA</xref>
5 </xrefs> 9 </xrefs>
6 <macros>
7 <token name="@TOOL_VERSION@">2.3</token>
8 <token name="@VERSION_SUFFIX@">2</token>
9 </macros>
10 <requirements> 10 <requirements>
11 <requirement type="package" version="@TOOL_VERSION@">salsa2</requirement> 11 <requirement type="package" version="@TOOL_VERSION@">salsa2</requirement>
12 <requirement type="package" version="1.11">samtools</requirement> 12 <requirement type="package" version="1.11">samtools</requirement>
13 </requirements> 13 </requirements>
14 <command detect_errors="exit_code"><![CDATA[ 14 <command detect_errors="exit_code"><![CDATA[
37 -g '$gfa_file' 37 -g '$gfa_file'
38 #end if 38 #end if
39 #if $iter: 39 #if $iter:
40 -i '$iter' 40 -i '$iter'
41 #end if 41 #end if
42 #if $gensize:
43 -s '$gensize'
44 #end if
45 -m '$clean'
42 -o ./out 46 -o ./out
43 ]]></command> 47 ]]></command>
44 <inputs> 48 <inputs>
45 <param name="fasta_in" type="data" format="fasta" label="Initial assembly file" help="Headers must not contain ':'."/> 49 <param name="fasta_in" type="data" format="fasta" label="Initial assembly file" help="Headers must not contain ':'."/>
46 <param name="bed_file" type="data" format="bed" label="Bed alignment" help="To start scaffolding with SALSA, reads need to be mapped to the assembly. 50 <param name="bed_file" type="data" format="bed" label="Bed alignment" help="To start scaffolding with SALSA, reads need to be mapped to the assembly.
47 BWA or BOWTIE2 are recommended. SALSA requires a bed file as the input. The alignment bam file can be converted using the bamToBed command from 51 BWA or BOWTIE2 are recommended. SALSA requires a bed file as the input. The alignment bam file can be converted using the bamToBed command from
48 the Bedtools package."/> 52 the Bedtools package."/>
49 <param name="cutoff" argument="-c" type="integer" min="1" label="Cutoff" optional="true" help="Minimum contig length to scaffold"/> 53 <param name="cutoff" argument="-c" type="integer" min="1" label="Cutoff" optional="true" help="Minimum contig length to scaffold"/>
50 <param name="gfa_file" argument="-g" type="data" format="gfa1,gfa2" optional="true" label="Sequence graphs" 54 <param name="gfa_file" argument="-g" type="data" format="gfa1,gfa2" optional="true" label="Sequence graphs"
51 help="An assembly graph can be optionally provided to guide the scaffolding, potentially reducing the scaffolding errors"/> 55 help="An assembly graph can be optionally provided to guide the scaffolding, potentially reducing the scaffolding errors"/>
52 <conditional name="enzyme_conditional"> 56 <conditional name="enzyme_conditional">
53 <param name="enzyme_options" type="select" label="Enzyme selection" help="Hi-C experiments can use different restriction enzymes. 57 <param name="enzyme_options" type="select" label="Enzyme selection" help="Hi-C experiments can use different restriction enzymes.
54 The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual 58 The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual
55 sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you 59 sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you
56 use an enzyme-free prep, e.g. Omin-C."> 60 use an enzyme-free prep, e.g. Omin-C.">
57 <option value="preconfigured">Preconfigured restriction enzymes</option> 61 <option value="preconfigured">Preconfigured restriction enzymes</option>
58 <option value="specific">Enter a specific sequence</option> 62 <option value="specific">Enter a specific sequence</option>
59 </param> 63 </param>
60 <when value="preconfigured"> 64 <when value="preconfigured">
69 help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT')."> 73 help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT').">
70 <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzym sequences.">value.replace(',', '').isalpha()</validator> 74 <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzym sequences.">value.replace(',', '').isalpha()</validator>
71 </param> 75 </param>
72 </when> 76 </when>
73 </conditional> 77 </conditional>
74 <param name="iter" argument="-i" type="integer" min="0" max="20" label="Iterations" optional="true" 78 <param name="iter" argument="-i" type="integer" min="0" max="20" label="Iterations" optional="true"
75 help="SALSA will scaffold through sequential iterations. The default number of iterations is 3. Increasing the number of iterations will 79 help="SALSA will scaffold through sequential iterations. The default number of iterations is 3. Increasing the number of iterations will
76 potentially increase the number of joins, however it could also introduce additional misjoins"/> 80 potentially increase the number of joins, however it could also introduce additional misjoins"/>
81 <param name="clean" argument="-m" type="boolean" label="Clean Assembly" checked="false" truevalue='yes' falsevalue="no" help="Set this option to 'yes' if you want to find misassemblies in input assembly" />
82 <param name="gensize" argument="-s" type="integer" label="Expected Genome Size" optional="true" help="Expected Genome size of the assembled genome. If not set, Salsa will estimate genome size." />
77 </inputs> 83 </inputs>
78 <outputs> 84 <outputs>
79 <data name="scaffolds_fasta" format="fasta" from_work_dir="out/scaffolds_FINAL.fasta" label="${tool.name} on ${on_string}: FASTA assembly"/> 85 <data name="scaffolds_fasta" format="fasta" from_work_dir="out/scaffolds_FINAL.fasta" label="${tool.name} on ${on_string}: FASTA assembly"/>
80 <data name="scaffolds_agp" format="tabular" from_work_dir="out/scaffolds_FINAL.agp" label="${tool.name} on ${on_string}: agp output"/> 86 <data name="scaffolds_agp" format="tabular" from_work_dir="out/scaffolds_FINAL.agp" label="${tool.name} on ${on_string}: agp output"/>
81 </outputs> 87 </outputs>
85 <param name="length" value="test.fai"/> 91 <param name="length" value="test.fai"/>
86 <param name="bed_file" value="test.bed"/> 92 <param name="bed_file" value="test.bed"/>
87 <param name="gfa_file" value="test.gfa1"/> 93 <param name="gfa_file" value="test.gfa1"/>
88 <conditional name="enzyme_conditional"> 94 <conditional name="enzyme_conditional">
89 <param name="enzyme_options" value="specific"/> 95 <param name="enzyme_options" value="specific"/>
90 <param name="manual_enzyme" value="GATC,GANTC"/> 96 <param name="manual_enzyme" value="GATC,GANTC"/>
91 </conditional> 97 </conditional>
92 <param name="enzyme" value="GATC,GANTC"/> 98 <param name="enzyme" value="GATC,GANTC"/>
93 <param name="cutoff" value="1000"/> 99 <param name="cutoff" value="1000"/>
94 <param name="iter" value="3"/> 100 <param name="iter" value="3"/>
101 <param name="clean" value="yes"/>
95 <output name="scaffolds_fasta" file="out.fasta"/> 102 <output name="scaffolds_fasta" file="out.fasta"/>
96 <output name="scaffolds_agp" file="out.agp"/> 103 <output name="scaffolds_agp" file="out.agp"/>
97 </test> 104 </test>
98 <!--Test manual enzyme--> 105 <!--Test manual enzyme-->
99 <test> 106 <test>
100 <param name="fasta_in" value="test.fasta"/> 107 <param name="fasta_in" value="test.fasta"/>
101 <param name="bed_file" value="test.bed"/> 108 <param name="bed_file" value="test.bed"/>
102 <param name="gfa_file" value="test.gfa1"/> 109 <param name="gfa_file" value="test.gfa1"/>
103 <conditional name="enzyme_conditional"> 110 <conditional name="enzyme_conditional">
104 <param name="enzyme_options" value="specific"/> 111 <param name="enzyme_options" value="specific"/>
105 <param name="manual_enzyme" value="GATC,GANTC"/> 112 <param name="manual_enzyme" value="GATC,GANTC"/>
106 </conditional> 113 </conditional>
107 <param name="cutoff" value="1000"/> 114 <param name="cutoff" value="1000"/>
108 <param name="iter" value="3"/> 115 <param name="iter" value="3"/>
116 <param name="clean" value="yes"/>
109 <output name="scaffolds_fasta" file="out.fasta"/> 117 <output name="scaffolds_fasta" file="out.fasta"/>
110 <output name="scaffolds_agp" file="out.agp"/> 118 <output name="scaffolds_agp" file="out.agp"/>
111 </test> 119 </test>
112 <!--Test predefined enzyme--> 120 <!--Test predefined enzyme-->
113 <test> 121 <test>
114 <param name="fasta_in" value="test.fasta"/> 122 <param name="fasta_in" value="test.fasta"/>
115 <param name="bed_file" value="test.bed"/> 123 <param name="bed_file" value="test.bed"/>
116 <param name="gfa_file" value="test.gfa1"/> 124 <param name="gfa_file" value="test.gfa1"/>
117 <conditional name="enzyme_conditional"> 125 <conditional name="enzyme_conditional">
118 <param name="enzyme_options" value="preconfigured"/> 126 <param name="enzyme_options" value="preconfigured"/>
119 <param name="preconfigured_enzymes" value="arima1"/> 127 <param name="preconfigured_enzymes" value="arima1"/>
120 </conditional> 128 </conditional>
121 <param name="cutoff" value="1000"/> 129 <param name="cutoff" value="1000"/>
122 <param name="iter" value="3"/> 130 <param name="iter" value="3"/>
131 <param name="clean" value="yes"/>
123 <output name="scaffolds_fasta" file="out.fasta"/> 132 <output name="scaffolds_fasta" file="out.fasta"/>
124 <output name="scaffolds_agp" file="out.agp"/> 133 <output name="scaffolds_agp" file="out.agp"/>
125 </test> 134 </test>
126 </tests> 135 </tests>
127 <help><![CDATA[ 136 <help><![CDATA[
136 145
137 .. class:: infomark 146 .. class:: infomark
138 147
139 **Mapping reads** 148 **Mapping reads**
140 149
141 To start the scaffolding, first step is to map reads to the assembly. We recommend using `BWA <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.2>`_ 150 To start the scaffolding, first step is to map reads to the assembly. We recommend using `BWA <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.2>`_
142 or `BOWTIE2 <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.4.2+galaxy0>`_ aligner to map reads. The read mapping generates a bam file. SALSA requires 151 or `BOWTIE2 <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.4.2+galaxy0>`_ aligner to map reads. The read mapping generates a bam file. SALSA requires
143 BED file as the input. This can be done using the bamToBed command from the `Bedtools package <http://bedtools.readthedocs.io/en/latest/>`_. Also, SALSA requires BED files to be sorted by the 152 BED file as the input. This can be done using the bamToBed command from the `Bedtools package <http://bedtools.readthedocs.io/en/latest/>`_. Also, SALSA requires BED files to be sorted by the
144 read name, rather than the alignment coordinates. Once you have bam file, you can run following commands to get the bam file needed as an input to SALSA. 153 read name, rather than the alignment coordinates. Once you have bam file, you can run following commands to get the bam file needed as an input to SALSA.
145 154
146 Since Hi-C reads and alignments contain experimental artifacts, the alignments needs some postprocessing. To align and postprocess 155 Since Hi-C reads and alignments contain experimental artifacts, the alignments needs some postprocessing. To align and postprocess
147 the alignments, you can use the pipeline released by Arima Genomics which can be found in the `GitHub repository <https://github.com/ArimaGenomics>`_. 156 the alignments, you can use the pipeline released by Arima Genomics which can be found in the `GitHub repository <https://github.com/ArimaGenomics>`_.
148 157
149 Additional information on how to generate/filter the bam `here <https://github.com/marbl/SALSA#mapping-reads>`_. 158 Additional information on how to generate/filter the bam `here <https://github.com/marbl/SALSA#mapping-reads>`_.
150 159
151 ]]></help> 160 ]]></help>