Mercurial > repos > iuc > salsa

--- a/salsa2.xml	Thu Sep 23 19:36:12 2021 +0000
+++ b/salsa2.xml	Thu Nov 11 15:03:17 2021 +0000
@@ -5,7 +5,7 @@
     </xrefs>
     <macros>
         <token name="@TOOL_VERSION@">2.3</token>
-        <token name="@VERSION_SUFFIX@">1</token>
+        <token name="@VERSION_SUFFIX@">2</token>
     </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">salsa2</requirement>
@@ -43,11 +43,17 @@
     ]]></command>
     <inputs>
         <param name="fasta_in" type="data" format="fasta" label="Initial assembly file" help="Headers must not contain ':'."/>
-        <param name="bed_file" type="data" format="bed" label="Bed alignment" help="Sorted by read names"/>
-        <param name="cutoff" argument="-c" type="integer" min="1" label="Cutoff" optional="true" help="Minimum contig length to scaffold."/>
-        <param name="gfa_file" argument="-g" type="data" format="gfa1,gfa2" optional="true" label="Sequence graphs" help="Sequence graphs encoded in GFA format."/>
+        <param name="bed_file" type="data" format="bed" label="Bed alignment" help="To start scaffolding with SALSA, reads need to be mapped to the assembly.
+            BWA or BOWTIE2 are recommended. SALSA requires a bed file as the input. The alignment bam file can be converted using the bamToBed command from
+            the Bedtools package."/>
+        <param name="cutoff" argument="-c" type="integer" min="1" label="Cutoff" optional="true" help="Minimum contig length to scaffold"/>
+        <param name="gfa_file" argument="-g" type="data" format="gfa1,gfa2" optional="true" label="Sequence graphs"
+            help="An assembly graph can be optionally provided to guide the scaffolding, potentially reducing the scaffolding errors"/>
         <conditional name="enzyme_conditional">
-            <param name="enzyme_options" type="select" label="Enzyme selection" help="TODO">
+            <param name="enzyme_options" type="select" label="Enzyme selection" help="Hi-C experiments can use different restriction enzymes.
+                The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual
+                sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you
+                use an enzyme-free prep, e.g. Omin-C.">
                 <option value="preconfigured">Preconfigured restriction enzymes</option>
                 <option value="specific">Enter a specific sequence</option>
             </param>
@@ -65,7 +71,9 @@
                 </param>
             </when>
         </conditional>
-        <param name="iter" argument="-i" type="integer" min="0" label="Iterations" optional="true" help="Number of iterations to run"/>
+        <param name="iter" argument="-i" type="integer" min="0" max="20" label="Iterations" optional="true"
+            help="SALSA will scaffold through sequential iterations. The default number of iterations is 3. Increasing the number of iterations will
+                potentially increase the number of joins, however it could also introduce additional misjoins"/>
     </inputs>
     <outputs>
         <data name="scaffolds_fasta" format="fasta" from_work_dir="out/scaffolds_FINAL.fasta" label="${tool.name} on ${on_string}: FASTA assembly"/>
@@ -117,11 +125,29 @@
         </test>
     </tests>
     <help><![CDATA[
-**What is does**
+.. class:: infomark
+
+**Purpose**

 SALSA (Simple AssembLy ScAffolder) is a scaffolding tool based on a computational method that exploits the genomic proximity
 information in Hi-C data sets for long range scaffolding of de novo genome assemblies.

+----
+
+.. class:: infomark
+
+**Mapping reads**
+
+To start the scaffolding, first step is to map reads to the assembly. We recommend using `BWA <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.2>`_
+or `BOWTIE2 <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.4.2+galaxy0>`_ aligner to map reads. The read mapping generates a bam file. SALSA requires
+BED file as the input. This can be done using the bamToBed command from the `Bedtools package <http://bedtools.readthedocs.io/en/latest/>`_. Also, SALSA requires BED files to be sorted by the
+read name, rather than the alignment coordinates. Once you have bam file, you can run following commands to get the bam file needed as an input to SALSA.
+
+Since Hi-C reads and alignments contain experimental artifacts, the alignments needs some postprocessing. To align and postprocess
+the alignments, you can use the pipeline released by Arima Genomics which can be found in the `GitHub repository <https://github.com/ArimaGenomics>`_.
+
+Additional information on how to generate/filter the bam `here <https://github.com/marbl/SALSA#mapping-reads>`_.
+
     ]]></help>
     <citations>
         <citation type="doi">10.1101/261149</citation>